1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s 3 4# Kernels can have no FP 5--- 6name: kernel_no_fold_fi_non_stack_rsrc_and_soffset 7tracksRegLiveness: true 8frameInfo: 9 maxAlignment: 4 10 localFrameSize: 4 11stack: 12 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 13machineFunctionInfo: 14 isEntryFunction: true 15 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 16 stackPtrOffsetReg: '$sgpr32' 17body: | 18 bb.0: 19 liveins: $sgpr12_sgpr13_sgpr14_sgpr15 20 21 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc_and_soffset 22 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15 23 ; GCN-NEXT: {{ $}} 24 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 25 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 26 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec 27 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 28 ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 29 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 30 %1:sreg_32_xm0 = S_MOV_B32 0 31 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 32 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, implicit $exec 33 $vgpr0 = COPY %3 34 SI_RETURN_TO_EPILOG $vgpr0 35 36... 37 38--- 39name: kernel_no_fold_fi_non_stack_rsrc 40tracksRegLiveness: true 41frameInfo: 42 maxAlignment: 4 43 localFrameSize: 4 44stack: 45 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 46machineFunctionInfo: 47 isEntryFunction: true 48 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 49 stackPtrOffsetReg: '$sgpr32' 50body: | 51 bb.0: 52 liveins: $sgpr12_sgpr13_sgpr14_sgpr15 53 54 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc 55 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15 56 ; GCN-NEXT: {{ $}} 57 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 58 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 59 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec 60 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 61 ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 62 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 63 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 64 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, implicit $exec 65 $vgpr0 = COPY %3 66 SI_RETURN_TO_EPILOG $vgpr0 67 68... 69 70--- 71name: kernel_no_fold_fi_non_stack_soffset 72tracksRegLiveness: true 73frameInfo: 74 maxAlignment: 4 75 localFrameSize: 4 76stack: 77 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 78machineFunctionInfo: 79 isEntryFunction: true 80 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 81 stackPtrOffsetReg: '$sgpr32' 82body: | 83 bb.0: 84 85 ; GCN-LABEL: name: kernel_no_fold_fi_non_stack_soffset 86 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 87 ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 88 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 89 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 90 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 91 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0 92 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 93 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 94 %2:sreg_32_xm0 = S_MOV_B32 0 95 96 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, implicit $exec 97 %3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, implicit $exec 98 $vgpr0 = COPY %3 99 S_ENDPGM 0, implicit $vgpr0 100 101... 102 103--- 104name: kernel_fold_fi_mubuf 105tracksRegLiveness: true 106frameInfo: 107 maxAlignment: 4 108 localFrameSize: 4 109stack: 110 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 111machineFunctionInfo: 112 isEntryFunction: true 113 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 114 stackPtrOffsetReg: '$sgpr32' 115body: | 116 bb.0: 117 118 ; GCN-LABEL: name: kernel_fold_fi_mubuf 119 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 120 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 121 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 122 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 123 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0 124 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 125 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 126 127 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 128 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 129 $vgpr0 = COPY %2 130 S_ENDPGM 0, implicit $vgpr0 131 132... 133 134 135# Functions have an unswizzled SP/FP relative to the wave offset 136--- 137name: function_no_fold_fi_non_stack_rsrc_and_soffset 138tracksRegLiveness: true 139frameInfo: 140 maxAlignment: 4 141 localFrameSize: 4 142stack: 143 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 144machineFunctionInfo: 145 isEntryFunction: false 146 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 147 frameOffsetReg: '$sgpr32' 148 stackPtrOffsetReg: '$sgpr32' 149body: | 150 bb.0: 151 liveins: $sgpr12_sgpr13_sgpr14_sgpr15 152 153 ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc_and_soffset 154 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15 155 ; GCN-NEXT: {{ $}} 156 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 157 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 158 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec 159 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 160 ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 161 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 162 %1:sreg_32_xm0 = S_MOV_B32 0 163 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 164 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, implicit $exec 165 $vgpr0 = COPY %3 166 SI_RETURN_TO_EPILOG $vgpr0 167 168... 169 170--- 171name: function_no_fold_fi_non_stack_rsrc 172tracksRegLiveness: true 173frameInfo: 174 maxAlignment: 4 175 localFrameSize: 4 176stack: 177 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 178machineFunctionInfo: 179 isEntryFunction: false 180 scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' 181 frameOffsetReg: '$sgpr32' 182 stackPtrOffsetReg: '$sgpr32' 183body: | 184 bb.0: 185 liveins: $sgpr12_sgpr13_sgpr14_sgpr15 186 187 ; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc 188 ; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15 189 ; GCN-NEXT: {{ $}} 190 ; GCN-NEXT: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 191 ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 192 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, implicit $exec 193 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 194 ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0 195 %0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15 196 %2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 197 %3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, 0, 0, 0, 0, implicit $exec 198 $vgpr0 = COPY %3 199 SI_RETURN_TO_EPILOG $vgpr0 200 201... 202 203--- 204name: function_no_fold_fi_non_stack_soffset 205tracksRegLiveness: true 206frameInfo: 207 maxAlignment: 4 208 localFrameSize: 4 209stack: 210 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 211machineFunctionInfo: 212 isEntryFunction: false 213 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 214 frameOffsetReg: '$sgpr32' 215 stackPtrOffsetReg: '$sgpr32' 216body: | 217 bb.0: 218 219 ; GCN-LABEL: name: function_no_fold_fi_non_stack_soffset 220 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 221 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 222 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 223 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 224 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0 225 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 226 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 227 228 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 229 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 230 $vgpr0 = COPY %2 231 S_ENDPGM 0, implicit $vgpr0 232 233... 234 235--- 236name: function_fold_fi_mubuf_wave_relative 237tracksRegLiveness: true 238frameInfo: 239 maxAlignment: 4 240 localFrameSize: 4 241stack: 242 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 243machineFunctionInfo: 244 isEntryFunction: false 245 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 246 frameOffsetReg: '$sgpr32' 247 stackPtrOffsetReg: '$sgpr32' 248body: | 249 bb.0: 250 251 ; GCN-LABEL: name: function_fold_fi_mubuf_wave_relative 252 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 253 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 254 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 255 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 256 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0 257 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 258 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 259 260 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 261 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 262 $vgpr0 = COPY %2 263 S_ENDPGM 0, implicit $vgpr0 264 265... 266 267--- 268name: function_fold_fi_mubuf_stack_relative 269tracksRegLiveness: true 270frameInfo: 271 maxAlignment: 4 272 localFrameSize: 4 273stack: 274 - { id: 0, size: 4, alignment: 4, local-offset: 0 } 275machineFunctionInfo: 276 isEntryFunction: false 277 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' 278 frameOffsetReg: '$sgpr32' 279 stackPtrOffsetReg: '$sgpr32' 280body: | 281 bb.0: 282 283 ; GCN-LABEL: name: function_fold_fi_mubuf_stack_relative 284 ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 285 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 286 ; GCN-NEXT: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 287 ; GCN-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]] 288 ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0 289 %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec 290 %1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec 291 292 BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 293 %2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 294 $vgpr0 = COPY %2 295 S_ENDPGM 0, implicit $vgpr0 296 297... 298