xref: /llvm-project/llvm/test/CodeGen/AMDGPU/splitkit-copy-live-lanes.mir (revision e7900e695e7dfb36be8651d914a31f42a5d6c634)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=amdgcn -verify-regalloc -run-pass=greedy %s -o - | FileCheck %s
3
4---
5name: zextload_global_v64i16_to_v64i64
6tracksRegLiveness: true
7machineFunctionInfo:
8  scratchRSrcReg:  '$sgpr96_sgpr97_sgpr98_sgpr99'
9  stackPtrOffsetReg: '$sgpr32'
10body:             |
11  bb.0:
12    liveins: $sgpr0_sgpr1
13
14    ; CHECK-LABEL: name: zextload_global_v64i16_to_v64i64
15    ; CHECK: liveins: $sgpr0_sgpr1
16    ; CHECK-NEXT: {{  $}}
17    ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_64(p4) = COPY $sgpr0_sgpr1
18    ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]](p4), 9, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4)
19    ; CHECK-NEXT: undef [[S_MOV_B32_:%[0-9]+]].sub3:sgpr_128 = S_MOV_B32 61440
20    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub2:sgpr_128 = S_MOV_B32 -1
21    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub0
22    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]].sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub1
23    ; CHECK-NEXT: undef [[COPY1:%[0-9]+]].sub0:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub2
24    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub1:sgpr_128 = COPY [[S_LOAD_DWORDX4_IMM]].sub3
25    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub2:sgpr_128 = COPY [[S_MOV_B32_]].sub2
26    ; CHECK-NEXT: [[COPY1:%[0-9]+]].sub3:sgpr_128 = COPY [[S_MOV_B32_]].sub3
27    ; CHECK-NEXT: early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE [[COPY1]], implicit $exec {
28    ; CHECK-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[COPY1]], 0, 0, 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1)
29    ; CHECK-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET1:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[COPY1]], 0, 16, 0, 0, implicit $exec :: (load (s128), addrspace 1)
30    ; CHECK-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET2:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[COPY1]], 0, 32, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1)
31    ; CHECK-NEXT:   [[BUFFER_LOAD_DWORDX4_OFFSET3:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[COPY1]], 0, 48, 0, 0, implicit $exec :: (load (s128), addrspace 1)
32    ; CHECK-NEXT: }
33    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec
34    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_1:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec
35    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_2:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec
36    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_3:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec
37    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_4:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec
38    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_5:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec
39    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_6:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec
40    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_7:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec
41    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_8:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec
42    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_9:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec
43    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_10:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec
44    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_11:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec
45    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_12:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec
46    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_13:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec
47    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_14:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec
48    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_15:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec
49    ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET4:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[COPY1]], 0, 64, 0, 0, implicit $exec :: (load (s128), align 64, addrspace 1)
50    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_16:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec
51    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_17:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec
52    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_18:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec
53    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_19:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec
54    ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET5:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[COPY1]], 0, 80, 0, 0, implicit $exec :: (load (s128), addrspace 1)
55    ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET6:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[COPY1]], 0, 96, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1)
56    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec
57    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_21:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec
58    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_22:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec
59    ; CHECK-NEXT: [[BUFFER_LOAD_DWORDX4_OFFSET7:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET [[COPY1]], 0, 112, 0, 0, implicit $exec :: (load (s128), addrspace 1)
60    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_23:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub1, implicit $exec
61    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_24:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub0, implicit $exec
62    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_25:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec
63    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_26:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec
64    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 65535
65    ; CHECK-NEXT: undef [[COPY2:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_]].sub2
66    ; CHECK-NEXT: [[COPY2:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub1, implicit $exec
67    ; CHECK-NEXT: undef [[COPY3:%[0-9]+]].sub0:vreg_128 = COPY [[COPY2]].sub0 {
68    ; CHECK-NEXT:   internal [[COPY3]].sub2:vreg_128 = COPY [[COPY2]].sub2
69    ; CHECK-NEXT: }
70    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY3]], %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
71    ; CHECK-NEXT: undef [[COPY4:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_1]].sub2
72    ; CHECK-NEXT: [[COPY4:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub0, implicit $exec
73    ; CHECK-NEXT: undef [[COPY5:%[0-9]+]].sub0:vreg_128 = COPY [[COPY4]].sub0 {
74    ; CHECK-NEXT:   internal [[COPY5]].sub2:vreg_128 = COPY [[COPY4]].sub2
75    ; CHECK-NEXT: }
76    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY5]], %stack.1, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.1, align 4, addrspace 5)
77    ; CHECK-NEXT: undef [[COPY6:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_2]].sub2
78    ; CHECK-NEXT: [[COPY6:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub3, implicit $exec
79    ; CHECK-NEXT: undef [[COPY7:%[0-9]+]].sub0:vreg_128 = COPY [[COPY6]].sub0 {
80    ; CHECK-NEXT:   internal [[COPY7]].sub2:vreg_128 = COPY [[COPY6]].sub2
81    ; CHECK-NEXT: }
82    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY7]], %stack.2, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.2, align 4, addrspace 5)
83    ; CHECK-NEXT: undef [[COPY8:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_3]].sub2
84    ; CHECK-NEXT: [[COPY8:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET]].sub2, implicit $exec
85    ; CHECK-NEXT: undef [[COPY9:%[0-9]+]].sub0:vreg_128 = COPY [[COPY8]].sub0 {
86    ; CHECK-NEXT:   internal [[COPY9]].sub2:vreg_128 = COPY [[COPY8]].sub2
87    ; CHECK-NEXT: }
88    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY9]], %stack.3, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.3, align 4, addrspace 5)
89    ; CHECK-NEXT: undef [[COPY10:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_4]].sub2
90    ; CHECK-NEXT: [[COPY10:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub1, implicit $exec
91    ; CHECK-NEXT: undef [[COPY11:%[0-9]+]].sub0:vreg_128 = COPY [[COPY10]].sub0 {
92    ; CHECK-NEXT:   internal [[COPY11]].sub2:vreg_128 = COPY [[COPY10]].sub2
93    ; CHECK-NEXT: }
94    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY11]], %stack.4, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.4, align 4, addrspace 5)
95    ; CHECK-NEXT: undef [[COPY12:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_5]].sub2
96    ; CHECK-NEXT: [[COPY12:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub0, implicit $exec
97    ; CHECK-NEXT: undef [[COPY13:%[0-9]+]].sub0:vreg_128 = COPY [[COPY12]].sub0 {
98    ; CHECK-NEXT:   internal [[COPY13]].sub2:vreg_128 = COPY [[COPY12]].sub2
99    ; CHECK-NEXT: }
100    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY13]], %stack.5, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.5, align 4, addrspace 5)
101    ; CHECK-NEXT: undef [[COPY14:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_6]].sub2
102    ; CHECK-NEXT: [[COPY14:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub3, implicit $exec
103    ; CHECK-NEXT: undef [[COPY15:%[0-9]+]].sub0:vreg_128 = COPY [[COPY14]].sub0 {
104    ; CHECK-NEXT:   internal [[COPY15]].sub2:vreg_128 = COPY [[COPY14]].sub2
105    ; CHECK-NEXT: }
106    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY15]], %stack.7, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.7, align 4, addrspace 5)
107    ; CHECK-NEXT: undef [[COPY16:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_7]].sub2
108    ; CHECK-NEXT: [[COPY16:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET1]].sub2, implicit $exec
109    ; CHECK-NEXT: undef [[COPY17:%[0-9]+]].sub0:vreg_128 = COPY [[COPY16]].sub0 {
110    ; CHECK-NEXT:   internal [[COPY17]].sub2:vreg_128 = COPY [[COPY16]].sub2
111    ; CHECK-NEXT: }
112    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY17]], %stack.6, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.6, align 4, addrspace 5)
113    ; CHECK-NEXT: undef [[COPY18:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_8]].sub2
114    ; CHECK-NEXT: [[COPY18:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub1, implicit $exec
115    ; CHECK-NEXT: undef [[COPY19:%[0-9]+]].sub0:vreg_128 = COPY [[COPY18]].sub0 {
116    ; CHECK-NEXT:   internal [[COPY19]].sub2:vreg_128 = COPY [[COPY18]].sub2
117    ; CHECK-NEXT: }
118    ; CHECK-NEXT: undef [[COPY20:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_9]].sub2
119    ; CHECK-NEXT: [[COPY20:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub0, implicit $exec
120    ; CHECK-NEXT: undef [[COPY21:%[0-9]+]].sub0:vreg_128 = COPY [[COPY20]].sub0 {
121    ; CHECK-NEXT:   internal [[COPY21]].sub2:vreg_128 = COPY [[COPY20]].sub2
122    ; CHECK-NEXT: }
123    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_10:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub3, implicit $exec
124    ; CHECK-NEXT: undef [[COPY22:%[0-9]+]].sub0:vreg_128 = COPY [[V_LSHRREV_B32_e32_10]].sub0 {
125    ; CHECK-NEXT:   internal [[COPY22]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_10]].sub2
126    ; CHECK-NEXT: }
127    ; CHECK-NEXT: undef [[COPY23:%[0-9]+]].sub0:vreg_128 = COPY [[COPY22]].sub0 {
128    ; CHECK-NEXT:   internal [[COPY23]].sub2:vreg_128 = COPY [[COPY22]].sub2
129    ; CHECK-NEXT: }
130    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY23]], %stack.8, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.8, align 4, addrspace 5)
131    ; CHECK-NEXT: undef [[COPY24:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_11]].sub2
132    ; CHECK-NEXT: [[COPY24:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET2]].sub2, implicit $exec
133    ; CHECK-NEXT: undef [[COPY25:%[0-9]+]].sub0:vreg_128 = COPY [[COPY24]].sub0 {
134    ; CHECK-NEXT:   internal [[COPY25]].sub2:vreg_128 = COPY [[COPY24]].sub2
135    ; CHECK-NEXT: }
136    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY25]], %stack.11, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.11, align 4, addrspace 5)
137    ; CHECK-NEXT: undef [[COPY26:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_12]].sub2
138    ; CHECK-NEXT: [[COPY26:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub1, implicit $exec
139    ; CHECK-NEXT: undef [[COPY27:%[0-9]+]].sub0:vreg_128 = COPY [[COPY26]].sub0 {
140    ; CHECK-NEXT:   internal [[COPY27]].sub2:vreg_128 = COPY [[COPY26]].sub2
141    ; CHECK-NEXT: }
142    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY27]], %stack.9, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.9, align 4, addrspace 5)
143    ; CHECK-NEXT: undef [[COPY28:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_13]].sub2
144    ; CHECK-NEXT: [[COPY28:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub0, implicit $exec
145    ; CHECK-NEXT: undef [[COPY29:%[0-9]+]].sub0:vreg_128 = COPY [[COPY28]].sub0 {
146    ; CHECK-NEXT:   internal [[COPY29]].sub2:vreg_128 = COPY [[COPY28]].sub2
147    ; CHECK-NEXT: }
148    ; CHECK-NEXT: undef [[COPY30:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_14]].sub2
149    ; CHECK-NEXT: [[COPY30:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub3, implicit $exec
150    ; CHECK-NEXT: undef [[COPY31:%[0-9]+]].sub0:vreg_128 = COPY [[COPY30]].sub0 {
151    ; CHECK-NEXT:   internal [[COPY31]].sub2:vreg_128 = COPY [[COPY30]].sub2
152    ; CHECK-NEXT: }
153    ; CHECK-NEXT: SI_SPILL_V128_SAVE [[COPY31]], %stack.10, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.10, align 4, addrspace 5)
154    ; CHECK-NEXT: undef [[COPY32:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_15]].sub2
155    ; CHECK-NEXT: [[COPY32:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET3]].sub2, implicit $exec
156    ; CHECK-NEXT: undef [[COPY33:%[0-9]+]].sub0:vreg_128 = COPY [[COPY32]].sub0 {
157    ; CHECK-NEXT:   internal [[COPY33]].sub2:vreg_128 = COPY [[COPY32]].sub2
158    ; CHECK-NEXT: }
159    ; CHECK-NEXT: undef [[COPY34:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_16]].sub2
160    ; CHECK-NEXT: [[COPY34:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub1, implicit $exec
161    ; CHECK-NEXT: undef [[COPY35:%[0-9]+]].sub0:vreg_128 = COPY [[COPY34]].sub0 {
162    ; CHECK-NEXT:   internal [[COPY35]].sub2:vreg_128 = COPY [[COPY34]].sub2
163    ; CHECK-NEXT: }
164    ; CHECK-NEXT: undef [[COPY36:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_17]].sub2
165    ; CHECK-NEXT: [[COPY36:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub0, implicit $exec
166    ; CHECK-NEXT: undef [[COPY37:%[0-9]+]].sub0:vreg_128 = COPY [[COPY36]].sub0 {
167    ; CHECK-NEXT:   internal [[COPY37]].sub2:vreg_128 = COPY [[COPY36]].sub2
168    ; CHECK-NEXT: }
169    ; CHECK-NEXT: undef [[COPY38:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_18]].sub2
170    ; CHECK-NEXT: [[COPY38:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub3, implicit $exec
171    ; CHECK-NEXT: undef [[COPY39:%[0-9]+]].sub0:vreg_128 = COPY [[COPY38]].sub0 {
172    ; CHECK-NEXT:   internal [[COPY39]].sub2:vreg_128 = COPY [[COPY38]].sub2
173    ; CHECK-NEXT: }
174    ; CHECK-NEXT: undef [[COPY40:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_19]].sub2
175    ; CHECK-NEXT: [[COPY40:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET4]].sub2, implicit $exec
176    ; CHECK-NEXT: undef [[COPY41:%[0-9]+]].sub0:vreg_128 = COPY [[COPY40]].sub0 {
177    ; CHECK-NEXT:   internal [[COPY41]].sub2:vreg_128 = COPY [[COPY40]].sub2
178    ; CHECK-NEXT: }
179    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_27:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec
180    ; CHECK-NEXT: undef [[COPY42:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_27]].sub2
181    ; CHECK-NEXT: [[COPY42:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub1, implicit $exec
182    ; CHECK-NEXT: undef [[COPY43:%[0-9]+]].sub0:vreg_128 = COPY [[COPY42]].sub0 {
183    ; CHECK-NEXT:   internal [[COPY43]].sub2:vreg_128 = COPY [[COPY42]].sub2
184    ; CHECK-NEXT: }
185    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_28:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec
186    ; CHECK-NEXT: undef [[COPY44:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_28]].sub2
187    ; CHECK-NEXT: [[COPY44:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub0, implicit $exec
188    ; CHECK-NEXT: undef [[COPY45:%[0-9]+]].sub0:vreg_128 = COPY [[COPY44]].sub0 {
189    ; CHECK-NEXT:   internal [[COPY45]].sub2:vreg_128 = COPY [[COPY44]].sub2
190    ; CHECK-NEXT: }
191    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_29:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec
192    ; CHECK-NEXT: undef [[COPY46:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_29]].sub2
193    ; CHECK-NEXT: [[COPY46:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub3, implicit $exec
194    ; CHECK-NEXT: undef [[COPY47:%[0-9]+]].sub0:vreg_128 = COPY [[COPY46]].sub0 {
195    ; CHECK-NEXT:   internal [[COPY47]].sub2:vreg_128 = COPY [[COPY46]].sub2
196    ; CHECK-NEXT: }
197    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_30:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec
198    ; CHECK-NEXT: undef [[COPY48:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_30]].sub2
199    ; CHECK-NEXT: [[COPY48:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET5]].sub2, implicit $exec
200    ; CHECK-NEXT: undef [[V_LSHRREV_B32_e32_31:%[0-9]+]].sub2:vreg_128 = V_LSHRREV_B32_e32 16, [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec
201    ; CHECK-NEXT: undef [[COPY49:%[0-9]+]].sub2:vreg_128 = COPY [[V_LSHRREV_B32_e32_31]].sub2
202    ; CHECK-NEXT: [[COPY49:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub1, implicit $exec
203    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub0, implicit $exec
204    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_21:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub3, implicit $exec
205    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_22:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET6]].sub2, implicit $exec
206    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_23:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub1, implicit $exec
207    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_24:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub0, implicit $exec
208    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_25:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub3, implicit $exec
209    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_26:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e32 [[S_MOV_B32_1]], [[BUFFER_LOAD_DWORDX4_OFFSET7]].sub2, implicit $exec
210    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_26:%[0-9]+]].sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
211    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_26:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
212    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_26]], [[S_MOV_B32_]], 0, 480, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
213    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_25:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
214    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_25:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
215    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_25]], [[S_MOV_B32_]], 0, 496, 0, 0, implicit $exec :: (store (s128), addrspace 1)
216    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_24:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
217    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_24:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
218    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_24]], [[S_MOV_B32_]], 0, 448, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
219    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_23:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
220    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_23:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
221    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_23]], [[S_MOV_B32_]], 0, 464, 0, 0, implicit $exec :: (store (s128), addrspace 1)
222    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_22:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
223    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_22:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
224    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_22]], [[S_MOV_B32_]], 0, 416, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
225    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_21:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
226    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_21:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
227    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_21]], [[S_MOV_B32_]], 0, 432, 0, 0, implicit $exec :: (store (s128), addrspace 1)
228    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
229    ; CHECK-NEXT: [[V_LSHRREV_B32_e32_20:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
230    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[V_LSHRREV_B32_e32_20]], [[S_MOV_B32_]], 0, 384, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
231    ; CHECK-NEXT: undef [[COPY50:%[0-9]+]].sub0:vreg_128 = COPY [[COPY49]].sub0 {
232    ; CHECK-NEXT:   internal [[COPY50]].sub2:vreg_128 = COPY [[COPY49]].sub2
233    ; CHECK-NEXT: }
234    ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
235    ; CHECK-NEXT: [[COPY50:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
236    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY50]], [[S_MOV_B32_]], 0, 400, 0, 0, implicit $exec :: (store (s128), addrspace 1)
237    ; CHECK-NEXT: undef [[COPY51:%[0-9]+]].sub0:vreg_128 = COPY [[COPY48]].sub0 {
238    ; CHECK-NEXT:   internal [[COPY51]].sub2:vreg_128 = COPY [[COPY48]].sub2
239    ; CHECK-NEXT: }
240    ; CHECK-NEXT: [[COPY51:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
241    ; CHECK-NEXT: [[COPY51:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
242    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY51]], [[S_MOV_B32_]], 0, 352, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
243    ; CHECK-NEXT: undef [[COPY52:%[0-9]+]].sub0:vreg_128 = COPY [[COPY47]].sub0 {
244    ; CHECK-NEXT:   internal [[COPY52]].sub2:vreg_128 = COPY [[COPY47]].sub2
245    ; CHECK-NEXT: }
246    ; CHECK-NEXT: undef [[COPY53:%[0-9]+]].sub0:vreg_128 = COPY [[COPY52]].sub0 {
247    ; CHECK-NEXT:   internal [[COPY53]].sub2:vreg_128 = COPY [[COPY52]].sub2
248    ; CHECK-NEXT: }
249    ; CHECK-NEXT: [[COPY53:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
250    ; CHECK-NEXT: [[COPY53:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
251    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY53]], [[S_MOV_B32_]], 0, 368, 0, 0, implicit $exec :: (store (s128), addrspace 1)
252    ; CHECK-NEXT: undef [[COPY54:%[0-9]+]].sub0:vreg_128 = COPY [[COPY45]].sub0 {
253    ; CHECK-NEXT:   internal [[COPY54]].sub2:vreg_128 = COPY [[COPY45]].sub2
254    ; CHECK-NEXT: }
255    ; CHECK-NEXT: undef [[COPY55:%[0-9]+]].sub0:vreg_128 = COPY [[COPY54]].sub0 {
256    ; CHECK-NEXT:   internal [[COPY55]].sub2:vreg_128 = COPY [[COPY54]].sub2
257    ; CHECK-NEXT: }
258    ; CHECK-NEXT: [[COPY55:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
259    ; CHECK-NEXT: [[COPY55:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
260    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY55]], [[S_MOV_B32_]], 0, 320, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
261    ; CHECK-NEXT: undef [[COPY56:%[0-9]+]].sub0:vreg_128 = COPY [[COPY43]].sub0 {
262    ; CHECK-NEXT:   internal [[COPY56]].sub2:vreg_128 = COPY [[COPY43]].sub2
263    ; CHECK-NEXT: }
264    ; CHECK-NEXT: undef [[COPY57:%[0-9]+]].sub0:vreg_128 = COPY [[COPY56]].sub0 {
265    ; CHECK-NEXT:   internal [[COPY57]].sub2:vreg_128 = COPY [[COPY56]].sub2
266    ; CHECK-NEXT: }
267    ; CHECK-NEXT: [[COPY57:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
268    ; CHECK-NEXT: [[COPY57:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
269    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY57]], [[S_MOV_B32_]], 0, 336, 0, 0, implicit $exec :: (store (s128), addrspace 1)
270    ; CHECK-NEXT: undef [[COPY58:%[0-9]+]].sub0:vreg_128 = COPY [[COPY41]].sub0 {
271    ; CHECK-NEXT:   internal [[COPY58]].sub2:vreg_128 = COPY [[COPY41]].sub2
272    ; CHECK-NEXT: }
273    ; CHECK-NEXT: undef [[COPY59:%[0-9]+]].sub0:vreg_128 = COPY [[COPY58]].sub0 {
274    ; CHECK-NEXT:   internal [[COPY59]].sub2:vreg_128 = COPY [[COPY58]].sub2
275    ; CHECK-NEXT: }
276    ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
277    ; CHECK-NEXT: [[COPY59:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
278    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY59]], [[S_MOV_B32_]], 0, 288, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
279    ; CHECK-NEXT: undef [[COPY60:%[0-9]+]].sub0:vreg_128 = COPY [[COPY39]].sub0 {
280    ; CHECK-NEXT:   internal [[COPY60]].sub2:vreg_128 = COPY [[COPY39]].sub2
281    ; CHECK-NEXT: }
282    ; CHECK-NEXT: undef [[COPY61:%[0-9]+]].sub0:vreg_128 = COPY [[COPY60]].sub0 {
283    ; CHECK-NEXT:   internal [[COPY61]].sub2:vreg_128 = COPY [[COPY60]].sub2
284    ; CHECK-NEXT: }
285    ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
286    ; CHECK-NEXT: [[COPY61:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
287    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY61]], [[S_MOV_B32_]], 0, 304, 0, 0, implicit $exec :: (store (s128), addrspace 1)
288    ; CHECK-NEXT: undef [[COPY62:%[0-9]+]].sub0:vreg_128 = COPY [[COPY37]].sub0 {
289    ; CHECK-NEXT:   internal [[COPY62]].sub2:vreg_128 = COPY [[COPY37]].sub2
290    ; CHECK-NEXT: }
291    ; CHECK-NEXT: undef [[COPY63:%[0-9]+]].sub0:vreg_128 = COPY [[COPY62]].sub0 {
292    ; CHECK-NEXT:   internal [[COPY63]].sub2:vreg_128 = COPY [[COPY62]].sub2
293    ; CHECK-NEXT: }
294    ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
295    ; CHECK-NEXT: [[COPY63:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
296    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY63]], [[S_MOV_B32_]], 0, 256, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1)
297    ; CHECK-NEXT: undef [[COPY64:%[0-9]+]].sub0:vreg_128 = COPY [[COPY35]].sub0 {
298    ; CHECK-NEXT:   internal [[COPY64]].sub2:vreg_128 = COPY [[COPY35]].sub2
299    ; CHECK-NEXT: }
300    ; CHECK-NEXT: undef [[COPY65:%[0-9]+]].sub0:vreg_128 = COPY [[COPY64]].sub0 {
301    ; CHECK-NEXT:   internal [[COPY65]].sub2:vreg_128 = COPY [[COPY64]].sub2
302    ; CHECK-NEXT: }
303    ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
304    ; CHECK-NEXT: [[COPY65:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
305    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY65]], [[S_MOV_B32_]], 0, 272, 0, 0, implicit $exec :: (store (s128), addrspace 1)
306    ; CHECK-NEXT: undef [[COPY66:%[0-9]+]].sub0:vreg_128 = COPY [[COPY33]].sub0 {
307    ; CHECK-NEXT:   internal [[COPY66]].sub2:vreg_128 = COPY [[COPY33]].sub2
308    ; CHECK-NEXT: }
309    ; CHECK-NEXT: undef [[COPY67:%[0-9]+]].sub0:vreg_128 = COPY [[COPY66]].sub0 {
310    ; CHECK-NEXT:   internal [[COPY67]].sub2:vreg_128 = COPY [[COPY66]].sub2
311    ; CHECK-NEXT: }
312    ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
313    ; CHECK-NEXT: [[COPY67:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
314    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY67]], [[S_MOV_B32_]], 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
315    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.10, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.10, align 4, addrspace 5)
316    ; CHECK-NEXT: undef [[COPY68:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub0 {
317    ; CHECK-NEXT:   internal [[COPY68]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE]].sub2
318    ; CHECK-NEXT: }
319    ; CHECK-NEXT: undef [[COPY69:%[0-9]+]].sub0:vreg_128 = COPY [[COPY68]].sub0 {
320    ; CHECK-NEXT:   internal [[COPY69]].sub2:vreg_128 = COPY [[COPY68]].sub2
321    ; CHECK-NEXT: }
322    ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
323    ; CHECK-NEXT: [[COPY69:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
324    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY69]], [[S_MOV_B32_]], 0, 240, 0, 0, implicit $exec :: (store (s128), addrspace 1)
325    ; CHECK-NEXT: undef [[COPY70:%[0-9]+]].sub0:vreg_128 = COPY [[COPY29]].sub0 {
326    ; CHECK-NEXT:   internal [[COPY70]].sub2:vreg_128 = COPY [[COPY29]].sub2
327    ; CHECK-NEXT: }
328    ; CHECK-NEXT: undef [[COPY71:%[0-9]+]].sub0:vreg_128 = COPY [[COPY70]].sub0 {
329    ; CHECK-NEXT:   internal [[COPY71]].sub2:vreg_128 = COPY [[COPY70]].sub2
330    ; CHECK-NEXT: }
331    ; CHECK-NEXT: [[COPY71:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
332    ; CHECK-NEXT: [[COPY71:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
333    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY71]], [[S_MOV_B32_]], 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
334    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE1:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.9, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.9, align 4, addrspace 5)
335    ; CHECK-NEXT: undef [[COPY72:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub0 {
336    ; CHECK-NEXT:   internal [[COPY72]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE1]].sub2
337    ; CHECK-NEXT: }
338    ; CHECK-NEXT: undef [[COPY73:%[0-9]+]].sub0:vreg_128 = COPY [[COPY72]].sub0 {
339    ; CHECK-NEXT:   internal [[COPY73]].sub2:vreg_128 = COPY [[COPY72]].sub2
340    ; CHECK-NEXT: }
341    ; CHECK-NEXT: [[COPY73:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
342    ; CHECK-NEXT: [[COPY73:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
343    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY73]], [[S_MOV_B32_]], 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1)
344    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE2:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.11, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.11, align 4, addrspace 5)
345    ; CHECK-NEXT: undef [[COPY74:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub0 {
346    ; CHECK-NEXT:   internal [[COPY74]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE2]].sub2
347    ; CHECK-NEXT: }
348    ; CHECK-NEXT: undef [[COPY75:%[0-9]+]].sub0:vreg_128 = COPY [[COPY74]].sub0 {
349    ; CHECK-NEXT:   internal [[COPY75]].sub2:vreg_128 = COPY [[COPY74]].sub2
350    ; CHECK-NEXT: }
351    ; CHECK-NEXT: [[COPY75:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
352    ; CHECK-NEXT: [[COPY75:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
353    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY75]], [[S_MOV_B32_]], 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
354    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE3:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.8, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.8, align 4, addrspace 5)
355    ; CHECK-NEXT: undef [[COPY76:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub0 {
356    ; CHECK-NEXT:   internal [[COPY76]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE3]].sub2
357    ; CHECK-NEXT: }
358    ; CHECK-NEXT: undef [[COPY77:%[0-9]+]].sub0:vreg_128 = COPY [[COPY76]].sub0 {
359    ; CHECK-NEXT:   internal [[COPY77]].sub2:vreg_128 = COPY [[COPY76]].sub2
360    ; CHECK-NEXT: }
361    ; CHECK-NEXT: [[COPY77:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
362    ; CHECK-NEXT: [[COPY77:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
363    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY77]], [[S_MOV_B32_]], 0, 176, 0, 0, implicit $exec :: (store (s128), addrspace 1)
364    ; CHECK-NEXT: undef [[COPY78:%[0-9]+]].sub0:vreg_128 = COPY [[COPY21]].sub0 {
365    ; CHECK-NEXT:   internal [[COPY78]].sub2:vreg_128 = COPY [[COPY21]].sub2
366    ; CHECK-NEXT: }
367    ; CHECK-NEXT: undef [[COPY79:%[0-9]+]].sub0:vreg_128 = COPY [[COPY78]].sub0 {
368    ; CHECK-NEXT:   internal [[COPY79]].sub2:vreg_128 = COPY [[COPY78]].sub2
369    ; CHECK-NEXT: }
370    ; CHECK-NEXT: [[COPY79:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
371    ; CHECK-NEXT: [[COPY79:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
372    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY79]], [[S_MOV_B32_]], 0, 128, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
373    ; CHECK-NEXT: undef [[COPY80:%[0-9]+]].sub0:vreg_128 = COPY [[COPY19]].sub0 {
374    ; CHECK-NEXT:   internal [[COPY80]].sub2:vreg_128 = COPY [[COPY19]].sub2
375    ; CHECK-NEXT: }
376    ; CHECK-NEXT: undef [[COPY81:%[0-9]+]].sub0:vreg_128 = COPY [[COPY80]].sub0 {
377    ; CHECK-NEXT:   internal [[COPY81]].sub2:vreg_128 = COPY [[COPY80]].sub2
378    ; CHECK-NEXT: }
379    ; CHECK-NEXT: [[COPY81:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
380    ; CHECK-NEXT: [[COPY81:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
381    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY81]], [[S_MOV_B32_]], 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1)
382    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE4:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.6, align 4, addrspace 5)
383    ; CHECK-NEXT: undef [[COPY82:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub0 {
384    ; CHECK-NEXT:   internal [[COPY82]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE4]].sub2
385    ; CHECK-NEXT: }
386    ; CHECK-NEXT: undef [[COPY83:%[0-9]+]].sub0:vreg_128 = COPY [[COPY82]].sub0 {
387    ; CHECK-NEXT:   internal [[COPY83]].sub2:vreg_128 = COPY [[COPY82]].sub2
388    ; CHECK-NEXT: }
389    ; CHECK-NEXT: [[COPY83:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
390    ; CHECK-NEXT: [[COPY83:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
391    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY83]], [[S_MOV_B32_]], 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
392    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE5:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.7, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.7, align 4, addrspace 5)
393    ; CHECK-NEXT: undef [[COPY84:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub0 {
394    ; CHECK-NEXT:   internal [[COPY84]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE5]].sub2
395    ; CHECK-NEXT: }
396    ; CHECK-NEXT: undef [[COPY85:%[0-9]+]].sub0:vreg_128 = COPY [[COPY84]].sub0 {
397    ; CHECK-NEXT:   internal [[COPY85]].sub2:vreg_128 = COPY [[COPY84]].sub2
398    ; CHECK-NEXT: }
399    ; CHECK-NEXT: [[COPY85:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
400    ; CHECK-NEXT: [[COPY85:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
401    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY85]], [[S_MOV_B32_]], 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1)
402    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE6:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.5, align 4, addrspace 5)
403    ; CHECK-NEXT: undef [[COPY86:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub0 {
404    ; CHECK-NEXT:   internal [[COPY86]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE6]].sub2
405    ; CHECK-NEXT: }
406    ; CHECK-NEXT: undef [[COPY87:%[0-9]+]].sub0:vreg_128 = COPY [[COPY86]].sub0 {
407    ; CHECK-NEXT:   internal [[COPY87]].sub2:vreg_128 = COPY [[COPY86]].sub2
408    ; CHECK-NEXT: }
409    ; CHECK-NEXT: [[COPY87:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
410    ; CHECK-NEXT: [[COPY87:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
411    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY87]], [[S_MOV_B32_]], 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
412    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE7:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.4, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.4, align 4, addrspace 5)
413    ; CHECK-NEXT: undef [[COPY88:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub0 {
414    ; CHECK-NEXT:   internal [[COPY88]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE7]].sub2
415    ; CHECK-NEXT: }
416    ; CHECK-NEXT: undef [[COPY89:%[0-9]+]].sub0:vreg_128 = COPY [[COPY88]].sub0 {
417    ; CHECK-NEXT:   internal [[COPY89]].sub2:vreg_128 = COPY [[COPY88]].sub2
418    ; CHECK-NEXT: }
419    ; CHECK-NEXT: [[COPY89:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
420    ; CHECK-NEXT: [[COPY89:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
421    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY89]], [[S_MOV_B32_]], 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1)
422    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE8:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.3, align 4, addrspace 5)
423    ; CHECK-NEXT: undef [[COPY90:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub0 {
424    ; CHECK-NEXT:   internal [[COPY90]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE8]].sub2
425    ; CHECK-NEXT: }
426    ; CHECK-NEXT: undef [[COPY91:%[0-9]+]].sub0:vreg_128 = COPY [[COPY90]].sub0 {
427    ; CHECK-NEXT:   internal [[COPY91]].sub2:vreg_128 = COPY [[COPY90]].sub2
428    ; CHECK-NEXT: }
429    ; CHECK-NEXT: [[COPY91:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
430    ; CHECK-NEXT: [[COPY91:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
431    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY91]], [[S_MOV_B32_]], 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
432    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE9:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.2, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.2, align 4, addrspace 5)
433    ; CHECK-NEXT: undef [[COPY92:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub0 {
434    ; CHECK-NEXT:   internal [[COPY92]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE9]].sub2
435    ; CHECK-NEXT: }
436    ; CHECK-NEXT: undef [[COPY93:%[0-9]+]].sub0:vreg_128 = COPY [[COPY92]].sub0 {
437    ; CHECK-NEXT:   internal [[COPY93]].sub2:vreg_128 = COPY [[COPY92]].sub2
438    ; CHECK-NEXT: }
439    ; CHECK-NEXT: [[COPY93:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
440    ; CHECK-NEXT: [[COPY93:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
441    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY93]], [[S_MOV_B32_]], 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1)
442    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE10:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.1, align 4, addrspace 5)
443    ; CHECK-NEXT: undef [[COPY94:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub0 {
444    ; CHECK-NEXT:   internal [[COPY94]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE10]].sub2
445    ; CHECK-NEXT: }
446    ; CHECK-NEXT: undef [[COPY95:%[0-9]+]].sub0:vreg_128 = COPY [[COPY94]].sub0 {
447    ; CHECK-NEXT:   internal [[COPY95]].sub2:vreg_128 = COPY [[COPY94]].sub2
448    ; CHECK-NEXT: }
449    ; CHECK-NEXT: [[COPY95:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
450    ; CHECK-NEXT: [[COPY95:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
451    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY95]], [[S_MOV_B32_]], 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1)
452    ; CHECK-NEXT: [[SI_SPILL_V128_RESTORE11:%[0-9]+]]:vreg_128 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
453    ; CHECK-NEXT: undef [[COPY96:%[0-9]+]].sub0:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub0 {
454    ; CHECK-NEXT:   internal [[COPY96]].sub2:vreg_128 = COPY [[SI_SPILL_V128_RESTORE11]].sub2
455    ; CHECK-NEXT: }
456    ; CHECK-NEXT: undef [[COPY97:%[0-9]+]].sub0:vreg_128 = COPY [[COPY96]].sub0 {
457    ; CHECK-NEXT:   internal [[COPY97]].sub2:vreg_128 = COPY [[COPY96]].sub2
458    ; CHECK-NEXT: }
459    ; CHECK-NEXT: [[COPY97:%[0-9]+]].sub1:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
460    ; CHECK-NEXT: [[COPY97:%[0-9]+]].sub3:vreg_128 = COPY [[V_LSHRREV_B32_e32_26]].sub1
461    ; CHECK-NEXT: BUFFER_STORE_DWORDX4_OFFSET [[COPY97]], [[S_MOV_B32_]], 0, 16, 0, 0, implicit $exec :: (store (s128), addrspace 1)
462    ; CHECK-NEXT: S_ENDPGM 0
463    %0:sgpr_64(p4) = COPY $sgpr0_sgpr1
464    %1:sgpr_128 = S_LOAD_DWORDX4_IMM %0(p4), 9, 0 :: (dereferenceable invariant load (s128), align 4, addrspace 4)
465    undef %2.sub3:sgpr_128 = S_MOV_B32 61440
466    %2.sub2:sgpr_128 = S_MOV_B32 -1
467    %2.sub0:sgpr_128 = COPY %1.sub0
468    %2.sub1:sgpr_128 = COPY %1.sub1
469    undef %3.sub0:sgpr_128 = COPY %1.sub2
470    %3.sub1:sgpr_128 = COPY %1.sub3
471    %3.sub2:sgpr_128 = COPY %2.sub2
472    %3.sub3:sgpr_128 = COPY %2.sub3
473    early-clobber %4:vreg_128, early-clobber %5:vreg_128, early-clobber %6:vreg_128, early-clobber %7:vreg_128 = BUNDLE %3, implicit $exec {
474      %7:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 0, 0, 0, implicit $exec :: (load (s128), align 128, addrspace 1)
475      %5:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 16, 0, 0, implicit $exec :: (load (s128), addrspace 1)
476      %4:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 32, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1)
477      %6:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 48, 0, 0, implicit $exec :: (load (s128), addrspace 1)
478    }
479    undef %8.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub1, implicit $exec
480    undef %9.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub0, implicit $exec
481    undef %10.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub3, implicit $exec
482    undef %11.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %7.sub2, implicit $exec
483    undef %12.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub1, implicit $exec
484    undef %13.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub0, implicit $exec
485    undef %14.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub3, implicit $exec
486    undef %15.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %5.sub2, implicit $exec
487    undef %16.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub1, implicit $exec
488    undef %17.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub0, implicit $exec
489    undef %18.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub3, implicit $exec
490    undef %19.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %4.sub2, implicit $exec
491    undef %20.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub1, implicit $exec
492    undef %21.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub0, implicit $exec
493    undef %22.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub3, implicit $exec
494    undef %23.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %6.sub2, implicit $exec
495    %24:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 64, 0, 0, implicit $exec :: (load (s128), align 64, addrspace 1)
496    undef %25.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub1, implicit $exec
497    undef %26.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub0, implicit $exec
498    undef %27.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub3, implicit $exec
499    undef %28.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %24.sub2, implicit $exec
500    %29:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 80, 0, 0, implicit $exec :: (load (s128), addrspace 1)
501    undef %30.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub1, implicit $exec
502    undef %31.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub0, implicit $exec
503    undef %32.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub3, implicit $exec
504    undef %33.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %29.sub2, implicit $exec
505    %34:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 96, 0, 0, implicit $exec :: (load (s128), align 32, addrspace 1)
506    undef %35.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub1, implicit $exec
507    undef %36.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub0, implicit $exec
508    undef %37.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub3, implicit $exec
509    undef %38.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %34.sub2, implicit $exec
510    %39:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %3, 0, 112, 0, 0, implicit $exec :: (load (s128), addrspace 1)
511    undef %40.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub1, implicit $exec
512    undef %41.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub0, implicit $exec
513    undef %42.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub3, implicit $exec
514    undef %43.sub2:vreg_128 = V_LSHRREV_B32_e32 16, %39.sub2, implicit $exec
515    %44:sreg_32 = S_MOV_B32 65535
516    %8.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub1, implicit $exec
517    %9.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub0, implicit $exec
518    %10.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub3, implicit $exec
519    %11.sub0:vreg_128 = V_AND_B32_e32 %44, %7.sub2, implicit $exec
520    %12.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub1, implicit $exec
521    %13.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub0, implicit $exec
522    %14.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub3, implicit $exec
523    %15.sub0:vreg_128 = V_AND_B32_e32 %44, %5.sub2, implicit $exec
524    %16.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub1, implicit $exec
525    %17.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub0, implicit $exec
526    %18.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub3, implicit $exec
527    %19.sub0:vreg_128 = V_AND_B32_e32 %44, %4.sub2, implicit $exec
528    %20.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub1, implicit $exec
529    %21.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub0, implicit $exec
530    %22.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub3, implicit $exec
531    %23.sub0:vreg_128 = V_AND_B32_e32 %44, %6.sub2, implicit $exec
532    %25.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub1, implicit $exec
533    %26.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub0, implicit $exec
534    %27.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub3, implicit $exec
535    %28.sub0:vreg_128 = V_AND_B32_e32 %44, %24.sub2, implicit $exec
536    %30.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub1, implicit $exec
537    %31.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub0, implicit $exec
538    %32.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub3, implicit $exec
539    %33.sub0:vreg_128 = V_AND_B32_e32 %44, %29.sub2, implicit $exec
540    %35.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub1, implicit $exec
541    %36.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub0, implicit $exec
542    %37.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub3, implicit $exec
543    %38.sub0:vreg_128 = V_AND_B32_e32 %44, %34.sub2, implicit $exec
544    %40.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub1, implicit $exec
545    %41.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub0, implicit $exec
546    %42.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub3, implicit $exec
547    %43.sub0:vreg_128 = V_AND_B32_e32 %44, %39.sub2, implicit $exec
548    %43.sub1:vreg_128 = V_MOV_B32_e32 0, implicit $exec
549    %43.sub3:vreg_128 = COPY %43.sub1
550    BUFFER_STORE_DWORDX4_OFFSET %43, %2, 0, 480, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
551    %42.sub1:vreg_128 = COPY %43.sub1
552    %42.sub3:vreg_128 = COPY %43.sub1
553    BUFFER_STORE_DWORDX4_OFFSET %42, %2, 0, 496, 0, 0, implicit $exec :: (store (s128), addrspace 1)
554    %41.sub1:vreg_128 = COPY %43.sub1
555    %41.sub3:vreg_128 = COPY %43.sub1
556    BUFFER_STORE_DWORDX4_OFFSET %41, %2, 0, 448, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
557    %40.sub1:vreg_128 = COPY %43.sub1
558    %40.sub3:vreg_128 = COPY %43.sub1
559    BUFFER_STORE_DWORDX4_OFFSET %40, %2, 0, 464, 0, 0, implicit $exec :: (store (s128), addrspace 1)
560    %38.sub1:vreg_128 = COPY %43.sub1
561    %38.sub3:vreg_128 = COPY %43.sub1
562    BUFFER_STORE_DWORDX4_OFFSET %38, %2, 0, 416, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
563    %37.sub1:vreg_128 = COPY %43.sub1
564    %37.sub3:vreg_128 = COPY %43.sub1
565    BUFFER_STORE_DWORDX4_OFFSET %37, %2, 0, 432, 0, 0, implicit $exec :: (store (s128), addrspace 1)
566    %36.sub1:vreg_128 = COPY %43.sub1
567    %36.sub3:vreg_128 = COPY %43.sub1
568    BUFFER_STORE_DWORDX4_OFFSET %36, %2, 0, 384, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
569    %35.sub1:vreg_128 = COPY %43.sub1
570    %35.sub3:vreg_128 = COPY %43.sub1
571    BUFFER_STORE_DWORDX4_OFFSET %35, %2, 0, 400, 0, 0, implicit $exec :: (store (s128), addrspace 1)
572    %33.sub1:vreg_128 = COPY %43.sub1
573    %33.sub3:vreg_128 = COPY %43.sub1
574    BUFFER_STORE_DWORDX4_OFFSET %33, %2, 0, 352, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
575    %32.sub1:vreg_128 = COPY %43.sub1
576    %32.sub3:vreg_128 = COPY %43.sub1
577    BUFFER_STORE_DWORDX4_OFFSET %32, %2, 0, 368, 0, 0, implicit $exec :: (store (s128), addrspace 1)
578    %31.sub1:vreg_128 = COPY %43.sub1
579    %31.sub3:vreg_128 = COPY %43.sub1
580    BUFFER_STORE_DWORDX4_OFFSET %31, %2, 0, 320, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
581    %30.sub1:vreg_128 = COPY %43.sub1
582    %30.sub3:vreg_128 = COPY %43.sub1
583    BUFFER_STORE_DWORDX4_OFFSET %30, %2, 0, 336, 0, 0, implicit $exec :: (store (s128), addrspace 1)
584    %28.sub1:vreg_128 = COPY %43.sub1
585    %28.sub3:vreg_128 = COPY %43.sub1
586    BUFFER_STORE_DWORDX4_OFFSET %28, %2, 0, 288, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
587    %27.sub1:vreg_128 = COPY %43.sub1
588    %27.sub3:vreg_128 = COPY %43.sub1
589    BUFFER_STORE_DWORDX4_OFFSET %27, %2, 0, 304, 0, 0, implicit $exec :: (store (s128), addrspace 1)
590    %26.sub1:vreg_128 = COPY %43.sub1
591    %26.sub3:vreg_128 = COPY %43.sub1
592    BUFFER_STORE_DWORDX4_OFFSET %26, %2, 0, 256, 0, 0, implicit $exec :: (store (s128), align 256, addrspace 1)
593    %25.sub1:vreg_128 = COPY %43.sub1
594    %25.sub3:vreg_128 = COPY %43.sub1
595    BUFFER_STORE_DWORDX4_OFFSET %25, %2, 0, 272, 0, 0, implicit $exec :: (store (s128), addrspace 1)
596    %23.sub1:vreg_128 = COPY %43.sub1
597    %23.sub3:vreg_128 = COPY %43.sub1
598    BUFFER_STORE_DWORDX4_OFFSET %23, %2, 0, 224, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
599    %22.sub1:vreg_128 = COPY %43.sub1
600    %22.sub3:vreg_128 = COPY %43.sub1
601    BUFFER_STORE_DWORDX4_OFFSET %22, %2, 0, 240, 0, 0, implicit $exec :: (store (s128), addrspace 1)
602    %21.sub1:vreg_128 = COPY %43.sub1
603    %21.sub3:vreg_128 = COPY %43.sub1
604    BUFFER_STORE_DWORDX4_OFFSET %21, %2, 0, 192, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
605    %20.sub1:vreg_128 = COPY %43.sub1
606    %20.sub3:vreg_128 = COPY %43.sub1
607    BUFFER_STORE_DWORDX4_OFFSET %20, %2, 0, 208, 0, 0, implicit $exec :: (store (s128), addrspace 1)
608    %19.sub1:vreg_128 = COPY %43.sub1
609    %19.sub3:vreg_128 = COPY %43.sub1
610    BUFFER_STORE_DWORDX4_OFFSET %19, %2, 0, 160, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
611    %18.sub1:vreg_128 = COPY %43.sub1
612    %18.sub3:vreg_128 = COPY %43.sub1
613    BUFFER_STORE_DWORDX4_OFFSET %18, %2, 0, 176, 0, 0, implicit $exec :: (store (s128), addrspace 1)
614    %17.sub1:vreg_128 = COPY %43.sub1
615    %17.sub3:vreg_128 = COPY %43.sub1
616    BUFFER_STORE_DWORDX4_OFFSET %17, %2, 0, 128, 0, 0, implicit $exec :: (store (s128), align 128, addrspace 1)
617    %16.sub1:vreg_128 = COPY %43.sub1
618    %16.sub3:vreg_128 = COPY %43.sub1
619    BUFFER_STORE_DWORDX4_OFFSET %16, %2, 0, 144, 0, 0, implicit $exec :: (store (s128), addrspace 1)
620    %15.sub1:vreg_128 = COPY %43.sub1
621    %15.sub3:vreg_128 = COPY %43.sub1
622    BUFFER_STORE_DWORDX4_OFFSET %15, %2, 0, 96, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
623    %14.sub1:vreg_128 = COPY %43.sub1
624    %14.sub3:vreg_128 = COPY %43.sub1
625    BUFFER_STORE_DWORDX4_OFFSET %14, %2, 0, 112, 0, 0, implicit $exec :: (store (s128), addrspace 1)
626    %13.sub1:vreg_128 = COPY %43.sub1
627    %13.sub3:vreg_128 = COPY %43.sub1
628    BUFFER_STORE_DWORDX4_OFFSET %13, %2, 0, 64, 0, 0, implicit $exec :: (store (s128), align 64, addrspace 1)
629    %12.sub1:vreg_128 = COPY %43.sub1
630    %12.sub3:vreg_128 = COPY %43.sub1
631    BUFFER_STORE_DWORDX4_OFFSET %12, %2, 0, 80, 0, 0, implicit $exec :: (store (s128), addrspace 1)
632    %11.sub1:vreg_128 = COPY %43.sub1
633    %11.sub3:vreg_128 = COPY %43.sub1
634    BUFFER_STORE_DWORDX4_OFFSET %11, %2, 0, 32, 0, 0, implicit $exec :: (store (s128), align 32, addrspace 1)
635    %10.sub1:vreg_128 = COPY %43.sub1
636    %10.sub3:vreg_128 = COPY %43.sub1
637    BUFFER_STORE_DWORDX4_OFFSET %10, %2, 0, 48, 0, 0, implicit $exec :: (store (s128), addrspace 1)
638    %9.sub1:vreg_128 = COPY %43.sub1
639    %9.sub3:vreg_128 = COPY %43.sub1
640    BUFFER_STORE_DWORDX4_OFFSET %9, %2, 0, 0, 0, 0, implicit $exec :: (store (s128), align 512, addrspace 1)
641    %8.sub1:vreg_128 = COPY %43.sub1
642    %8.sub3:vreg_128 = COPY %43.sub1
643    BUFFER_STORE_DWORDX4_OFFSET %8, %2, 0, 16, 0, 0, implicit $exec :: (store (s128), addrspace 1)
644    S_ENDPGM 0
645...
646