xref: /llvm-project/llvm/test/CodeGen/AMDGPU/vgpr-mark-last-scratch-load.mir (revision 0185c764563f5b58b315cf80325c52e2aedf5b90)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
2# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -o - %s -run-pass=greedy -run-pass=amdgpu-mark-last-scratch-load -verify-machineinstrs | FileCheck -check-prefix=CHECK %s
3
4--- |
5  define amdgpu_cs void @test_spill_12x32() "amdgpu-num-vgpr"="12" {
6    ret void
7  }
8  define amdgpu_cs void @test_spill_384() "amdgpu-num-vgpr"="12" {
9    ret void
10  }
11  define amdgpu_ps void @test_loop_12() "amdgpu-num-vgpr"="12" {
12    ret void
13  }
14...
15---
16name: test_spill_12x32
17tracksRegLiveness: true
18machineFunctionInfo:
19  stackPtrOffsetReg: '$sgpr32'
20body:             |
21  bb.0:
22    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
23
24    ; CHECK-LABEL: name: test_spill_12x32
25    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
26    ; CHECK-NEXT: {{  $}}
27    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
28    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr1, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
29    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr2, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
30    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr3, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
31    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr4, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
32    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr5, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
33    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr6, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
34    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr7, %stack.7, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
35    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr8, %stack.8, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
36    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr9, %stack.9, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
37    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr10, %stack.10, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
38    ; CHECK-NEXT: SI_SPILL_V32_SAVE $vgpr11, %stack.11, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
39    ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
40    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.0, addrspace 5)
41    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE1:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.1, addrspace 5)
42    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE2:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.2, addrspace 5)
43    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE3:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.3, addrspace 5)
44    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE4:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.4, addrspace 5)
45    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE5:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.5, addrspace 5)
46    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE6:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.6, addrspace 5)
47    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE7:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.7, addrspace 5)
48    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE8:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.8, addrspace 5)
49    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE9:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.9, addrspace 5)
50    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE10:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.10, addrspace 5)
51    ; CHECK-NEXT: [[SI_SPILL_V32_RESTORE11:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.11, addrspace 5)
52    ; CHECK-NEXT: S_ENDPGM 0, implicit [[SI_SPILL_V32_RESTORE]], implicit [[SI_SPILL_V32_RESTORE1]], implicit [[SI_SPILL_V32_RESTORE2]], implicit [[SI_SPILL_V32_RESTORE3]], implicit [[SI_SPILL_V32_RESTORE4]], implicit [[SI_SPILL_V32_RESTORE5]], implicit [[SI_SPILL_V32_RESTORE6]], implicit [[SI_SPILL_V32_RESTORE7]], implicit [[SI_SPILL_V32_RESTORE8]], implicit [[SI_SPILL_V32_RESTORE9]], implicit [[SI_SPILL_V32_RESTORE10]], implicit [[SI_SPILL_V32_RESTORE11]]
53    %0:vgpr_32 = COPY $vgpr0
54    %1:vgpr_32 = COPY $vgpr1
55    %2:vgpr_32 = COPY $vgpr2
56    %3:vgpr_32 = COPY $vgpr3
57    %4:vgpr_32 = COPY $vgpr4
58    %5:vgpr_32 = COPY $vgpr5
59    %6:vgpr_32 = COPY $vgpr6
60    %7:vgpr_32 = COPY $vgpr7
61    %8:vgpr_32 = COPY $vgpr8
62    %9:vgpr_32 = COPY $vgpr9
63    %10:vgpr_32 = COPY $vgpr10
64    %11:vgpr_32 = COPY $vgpr11
65    INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
66    S_ENDPGM 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11
67...
68
69---
70name: test_spill_384
71tracksRegLiveness: true
72machineFunctionInfo:
73  stackPtrOffsetReg: '$sgpr32'
74body:             |
75  bb.0:
76    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
77
78    ; CHECK-LABEL: name: test_spill_384
79    ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
80    ; CHECK-NEXT: {{  $}}
81    ; CHECK-NEXT: SI_SPILL_V384_SAVE $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11, %stack.0, $sgpr32, 0, implicit $exec :: (store (s384) into %stack.0, align 4, addrspace 5)
82    ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
83    ; CHECK-NEXT: [[SI_SPILL_V384_RESTORE:%[0-9]+]]:vreg_384 = SI_SPILL_V384_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s384) from %stack.0, align 4, addrspace 5)
84    ; CHECK-NEXT: S_ENDPGM 0, implicit [[SI_SPILL_V384_RESTORE]]
85    %0:vreg_384 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
86    INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11
87    S_ENDPGM 0, implicit %0
88...
89
90---
91name: test_loop_12
92tracksRegLiveness: true
93machineFunctionInfo:
94  stackPtrOffsetReg: '$sgpr32'
95body:             |
96  ; CHECK-LABEL: name: test_loop_12
97  ; CHECK: bb.0:
98  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
99  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
100  ; CHECK-NEXT: {{  $}}
101  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr12, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
102  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr11, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
103  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr10, %stack.2, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
104  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr9, %stack.3, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
105  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr8, %stack.4, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
106  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr7, %stack.5, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
107  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr6, %stack.6, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
108  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr5, %stack.7, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
109  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr4, %stack.8, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
110  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr3, %stack.9, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
111  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr2, %stack.10, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
112  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr1, %stack.11, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
113  ; CHECK-NEXT:   SI_SPILL_V32_SAVE $vgpr0, %stack.12, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
114  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
115  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
116  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
117  ; CHECK-NEXT:   [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
118  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_3]], %stack.16, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
119  ; CHECK-NEXT:   %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
120  ; CHECK-NEXT:   %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
121  ; CHECK-NEXT:   %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
122  ; CHECK-NEXT:   %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
123  ; CHECK-NEXT:   %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
124  ; CHECK-NEXT:   %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
125  ; CHECK-NEXT:   %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
126  ; CHECK-NEXT:   %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
127  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
128  ; CHECK-NEXT:   %vcmp:sreg_32 = V_CMP_LT_I32_e64 0, [[SI_SPILL_V32_RESTORE]], implicit $exec
129  ; CHECK-NEXT:   %mask:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
130  ; CHECK-NEXT:   %sand:sreg_32 = S_AND_B32 %mask, %vcmp, implicit-def dead $scc
131  ; CHECK-NEXT:   $exec_lo = S_MOV_B32_term %sand
132  ; CHECK-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
133  ; CHECK-NEXT:   S_BRANCH %bb.1
134  ; CHECK-NEXT: {{  $}}
135  ; CHECK-NEXT: bb.1:
136  ; CHECK-NEXT:   successors: %bb.3(0x80000000)
137  ; CHECK-NEXT: {{  $}}
138  ; CHECK-NEXT:   %mask2:sgpr_32 = S_MOV_B32 0
139  ; CHECK-NEXT:   %count:sgpr_32 = S_MOV_B32 0
140  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
141  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
142  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
143  ; CHECK-NEXT:   [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
144  ; CHECK-NEXT:   %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
145  ; CHECK-NEXT:   %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
146  ; CHECK-NEXT:   %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
147  ; CHECK-NEXT:   %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
148  ; CHECK-NEXT:   %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
149  ; CHECK-NEXT:   %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
150  ; CHECK-NEXT:   %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
151  ; CHECK-NEXT:   %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
152  ; CHECK-NEXT:   S_BRANCH %bb.3
153  ; CHECK-NEXT: {{  $}}
154  ; CHECK-NEXT: bb.2:
155  ; CHECK-NEXT:   successors: %bb.5(0x80000000)
156  ; CHECK-NEXT: {{  $}}
157  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, %mask, implicit-def $scc
158  ; CHECK-NEXT:   S_BRANCH %bb.5
159  ; CHECK-NEXT: {{  $}}
160  ; CHECK-NEXT: bb.3:
161  ; CHECK-NEXT:   successors: %bb.4(0x04000000), %bb.3(0x7c000000)
162  ; CHECK-NEXT: {{  $}}
163  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_1]], %stack.14, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
164  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE1:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
165  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_]], 0, [[SI_SPILL_V32_RESTORE1]], 0, 0, implicit $mode, implicit $exec
166  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.14, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.14, addrspace 5)
167  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_]], %stack.13, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
168  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE2:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
169  ; CHECK-NEXT:   [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_1]], 0, [[SI_SPILL_V32_RESTORE2]], 0, 0, implicit $mode, implicit $exec
170  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE3:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
171  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_2]], 0, [[SI_SPILL_V32_RESTORE3]], 0, 0, implicit $mode, implicit $exec
172  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_2]], %stack.15, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
173  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE4:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
174  ; CHECK-NEXT:   [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_F32_e64 0, [[V_MOV_B32_e32_4]], 0, [[SI_SPILL_V32_RESTORE4]], 0, 0, implicit $mode, implicit $exec
175  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE5:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
176  ; CHECK-NEXT:   %res5:vgpr_32 = V_ADD_F32_e64 0, %res5, 0, [[SI_SPILL_V32_RESTORE5]], 0, 0, implicit $mode, implicit $exec
177  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE6:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
178  ; CHECK-NEXT:   %res6:vgpr_32 = V_ADD_F32_e64 0, %res6, 0, [[SI_SPILL_V32_RESTORE6]], 0, 0, implicit $mode, implicit $exec
179  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE7:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
180  ; CHECK-NEXT:   %res7:vgpr_32 = V_ADD_F32_e64 0, %res7, 0, [[SI_SPILL_V32_RESTORE7]], 0, 0, implicit $mode, implicit $exec
181  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE8:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
182  ; CHECK-NEXT:   %res8:vgpr_32 = V_ADD_F32_e64 0, %res8, 0, [[SI_SPILL_V32_RESTORE8]], 0, 0, implicit $mode, implicit $exec
183  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE9:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
184  ; CHECK-NEXT:   %res9:vgpr_32 = V_ADD_F32_e64 0, %res9, 0, [[SI_SPILL_V32_RESTORE9]], 0, 0, implicit $mode, implicit $exec
185  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE10:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
186  ; CHECK-NEXT:   %res10:vgpr_32 = V_ADD_F32_e64 0, %res10, 0, [[SI_SPILL_V32_RESTORE10]], 0, 0, implicit $mode, implicit $exec
187  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE11:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
188  ; CHECK-NEXT:   %res11:vgpr_32 = V_ADD_F32_e64 0, %res11, 0, [[SI_SPILL_V32_RESTORE11]], 0, 0, implicit $mode, implicit $exec
189  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE12:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
190  ; CHECK-NEXT:   %res12:vgpr_32 = V_ADD_F32_e64 0, %res12, 0, [[SI_SPILL_V32_RESTORE12]], 0, 0, implicit $mode, implicit $exec
191  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.13, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
192  ; CHECK-NEXT:   %count:sgpr_32 = nuw nsw S_ADD_I32 %count, 1, implicit-def dead $scc
193  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE13:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.12, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
194  ; CHECK-NEXT:   %vcmp2:sreg_32 = V_CMP_GE_I32_e64 %count, [[SI_SPILL_V32_RESTORE13]], implicit $exec
195  ; CHECK-NEXT:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.15, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
196  ; CHECK-NEXT:   %mask2:sgpr_32 = S_OR_B32 %vcmp2, %mask2, implicit-def $scc
197  ; CHECK-NEXT:   $exec_lo = S_ANDN2_B32_term $exec_lo, %mask2, implicit-def $scc
198  ; CHECK-NEXT:   S_CBRANCH_EXECNZ %bb.3, implicit $exec
199  ; CHECK-NEXT:   S_BRANCH %bb.4
200  ; CHECK-NEXT: {{  $}}
201  ; CHECK-NEXT: bb.4:
202  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
203  ; CHECK-NEXT: {{  $}}
204  ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, %mask2, implicit-def $scc
205  ; CHECK-NEXT:   SI_SPILL_V32_SAVE [[V_MOV_B32_e32_4]], %stack.16, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
206  ; CHECK-NEXT:   S_BRANCH %bb.2
207  ; CHECK-NEXT: {{  $}}
208  ; CHECK-NEXT: bb.5:
209  ; CHECK-NEXT:   [[SI_SPILL_V32_RESTORE14:%[0-9]+]]:vgpr_32 = SI_SPILL_V32_RESTORE %stack.16, $sgpr32, 0, implicit $exec :: ("amdgpu-last-use" load (s32) from %stack.16, addrspace 5)
210  ; CHECK-NEXT:   EXP_DONE 0, [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_2]], [[SI_SPILL_V32_RESTORE14]], -1, 0, 15, implicit $exec
211  ; CHECK-NEXT:   EXP_DONE 0, %res5, %res6, %res7, %res8, -1, 0, 15, implicit $exec
212  ; CHECK-NEXT:   EXP_DONE 0, %res9, %res10, %res11, %res12, -1, 0, 15, implicit $exec
213  ; CHECK-NEXT:   S_ENDPGM 0
214  bb.0: ; entry
215    successors: %bb.1(0x40000000), %bb.2(0x40000000)
216    liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12
217
218    %12:vgpr_32 = COPY $vgpr12
219    %11:vgpr_32 = COPY $vgpr11
220    %10:vgpr_32 = COPY $vgpr10
221    %9:vgpr_32 = COPY $vgpr9
222    %8:vgpr_32 = COPY $vgpr8
223    %7:vgpr_32 = COPY $vgpr7
224    %6:vgpr_32 = COPY $vgpr6
225    %5:vgpr_32 = COPY $vgpr5
226    %4:vgpr_32 = COPY $vgpr4
227    %3:vgpr_32 = COPY $vgpr3
228    %2:vgpr_32 = COPY $vgpr2
229    %1:vgpr_32 = COPY $vgpr1
230    %loop_end:vgpr_32 = COPY $vgpr0
231    %res1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
232    %res2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
233    %res3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
234    %res4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
235    %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
236    %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
237    %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
238    %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
239    %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
240    %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
241    %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
242    %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
243    %vcmp:sreg_32 = V_CMP_LT_I32_e64 0, %loop_end, implicit $exec
244    %mask:sreg_32 = COPY $exec_lo, implicit-def $exec_lo
245    %sand:sreg_32 = S_AND_B32 %mask, %vcmp, implicit-def dead $scc
246    $exec_lo = S_MOV_B32_term %sand
247    S_CBRANCH_EXECZ %bb.2, implicit $exec
248    S_BRANCH %bb.1
249
250  bb.1: ; loop preheader
251    successors: %bb.3(0x80000000)
252
253    %mask2:sgpr_32 = S_MOV_B32 0
254    %count:sgpr_32 = S_MOV_B32 0
255    %res1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
256    %res2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
257    %res3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
258    %res4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
259    %res5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
260    %res6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
261    %res7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
262    %res8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
263    %res9:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
264    %res10:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
265    %res11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
266    %res12:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
267    S_BRANCH %bb.3
268
269  bb.2: ; flow
270    successors: %bb.5(0x80000000)
271
272    $exec_lo = S_OR_B32 $exec_lo, %mask, implicit-def $scc
273    S_BRANCH %bb.5
274
275  bb.3: ; loop
276    successors: %bb.4(0x04000000), %bb.3(0x7c000000)
277
278    %res1:vgpr_32 = V_ADD_F32_e64 0, %res1, 0, %1, 0, 0, implicit $mode, implicit $exec
279    %res2:vgpr_32 = V_ADD_F32_e64 0, %res2, 0, %2, 0, 0, implicit $mode, implicit $exec
280    %res3:vgpr_32 = V_ADD_F32_e64 0, %res3, 0, %3, 0, 0, implicit $mode, implicit $exec
281    %res4:vgpr_32 = V_ADD_F32_e64 0, %res4, 0, %4, 0, 0, implicit $mode, implicit $exec
282    %res5:vgpr_32 = V_ADD_F32_e64 0, %res5, 0, %5, 0, 0, implicit $mode, implicit $exec
283    %res6:vgpr_32 = V_ADD_F32_e64 0, %res6, 0, %6, 0, 0, implicit $mode, implicit $exec
284    %res7:vgpr_32 = V_ADD_F32_e64 0, %res7, 0, %7, 0, 0, implicit $mode, implicit $exec
285    %res8:vgpr_32 = V_ADD_F32_e64 0, %res8, 0, %8, 0, 0, implicit $mode, implicit $exec
286    %res9:vgpr_32 = V_ADD_F32_e64 0, %res9, 0, %9, 0, 0, implicit $mode, implicit $exec
287    %res10:vgpr_32 = V_ADD_F32_e64 0, %res10, 0, %10, 0, 0, implicit $mode, implicit $exec
288    %res11:vgpr_32 = V_ADD_F32_e64 0, %res11, 0, %11, 0, 0, implicit $mode, implicit $exec
289    %res12:vgpr_32 = V_ADD_F32_e64 0, %res12, 0, %12, 0, 0, implicit $mode, implicit $exec
290    %count:sgpr_32 = nuw nsw S_ADD_I32 %count, 1, implicit-def dead $scc
291    %vcmp2:sreg_32 = V_CMP_GE_I32_e64 %count, %loop_end, implicit $exec
292    %mask2:sgpr_32 = S_OR_B32 %vcmp2, %mask2, implicit-def $scc
293    $exec_lo = S_ANDN2_B32_term $exec_lo, %mask2, implicit-def $scc
294    S_CBRANCH_EXECNZ %bb.3, implicit $exec
295    S_BRANCH %bb.4
296
297  bb.4: ; flow
298    successors: %bb.2(0x80000000)
299
300    $exec_lo = S_OR_B32 $exec_lo, %mask2, implicit-def $scc
301    S_BRANCH %bb.2
302
303  bb.5: ; exit
304    EXP_DONE 0, %res1, %res2, %res3, %res4, -1, 0, 15, implicit $exec
305    EXP_DONE 0, %res5, %res6, %res7, %res8, -1, 0, 15, implicit $exec
306    EXP_DONE 0, %res9, %res10, %res11, %res12, -1, 0, 15, implicit $exec
307    S_ENDPGM 0
308
309...
310