xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2c.mir (revision 00a4e248dc65d3a60fd900b342d4ba410bf70af0)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
3
4--- |
5  define amdgpu_kernel void @single-wave-phase-2c(ptr addrspace(3) noalias %in0, ptr addrspace(3) noalias %in1, ptr addrspace(3) noalias %in2, ptr addrspace(3) noalias %in3, ptr addrspace(3) noalias %in4, ptr addrspace(3) noalias %in5, ptr addrspace(3) noalias %in6, ptr addrspace(3) noalias %in7, ptr addrspace(3) noalias %in8, ptr addrspace(3) noalias %in9, ptr addrspace(3) noalias %in10, ptr addrspace(3) noalias %in11, ptr addrspace(7) noalias %in12, ptr addrspace(7) noalias %in13, ptr addrspace(7) noalias %in14, ptr addrspace(7) noalias %in15, ptr addrspace(7) noalias %in16, ptr addrspace(7) noalias %in17) #0 { ret void }
6
7
8  !0 = distinct !{!0}
9  !1 = !{!1, !0}
10...
11
12---
13name:            single-wave-phase-2c
14tracksRegLiveness: true
15machineFunctionInfo:
16  occupancy:       1
17body:      |
18  ; GCN-LABEL: name: single-wave-phase-2c
19  ; GCN: bb.0:
20  ; GCN-NEXT:   successors: %bb.1(0x80000000)
21  ; GCN-NEXT: {{  $}}
22  ; GCN-NEXT:   [[DEF:%[0-9]+]]:av_512_align2 = IMPLICIT_DEF
23  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:av_512_align2 = IMPLICIT_DEF
24  ; GCN-NEXT:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
25  ; GCN-NEXT:   [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
26  ; GCN-NEXT:   [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
27  ; GCN-NEXT:   [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
28  ; GCN-NEXT:   [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
29  ; GCN-NEXT:   [[DEF7:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
30  ; GCN-NEXT:   [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
31  ; GCN-NEXT:   [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
32  ; GCN-NEXT:   [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
33  ; GCN-NEXT:   [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
34  ; GCN-NEXT:   [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
35  ; GCN-NEXT:   [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
36  ; GCN-NEXT:   [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
37  ; GCN-NEXT:   [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
38  ; GCN-NEXT:   [[DEF16:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
39  ; GCN-NEXT:   [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
40  ; GCN-NEXT:   [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
41  ; GCN-NEXT:   [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
42  ; GCN-NEXT:   dead [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
43  ; GCN-NEXT:   [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
44  ; GCN-NEXT:   [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
45  ; GCN-NEXT:   [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
46  ; GCN-NEXT:   [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
47  ; GCN-NEXT:   [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
48  ; GCN-NEXT:   [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
49  ; GCN-NEXT:   [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
50  ; GCN-NEXT:   [[DEF28:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
51  ; GCN-NEXT:   [[DEF29:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
52  ; GCN-NEXT:   [[DEF30:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
53  ; GCN-NEXT:   [[DEF31:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
54  ; GCN-NEXT:   [[DEF32:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
55  ; GCN-NEXT:   [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
56  ; GCN-NEXT: {{  $}}
57  ; GCN-NEXT: bb.1:
58  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
59  ; GCN-NEXT: {{  $}}
60  ; GCN-NEXT:   [[DS_READ_B128_gfx9_:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
61  ; GCN-NEXT:   [[DS_READ_B128_gfx9_1:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 0, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
62  ; GCN-NEXT:   [[DS_READ_B128_gfx9_2:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
63  ; GCN-NEXT:   [[DS_READ_B128_gfx9_3:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 2064, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
64  ; GCN-NEXT:   [[COPY:%[0-9]+]]:areg_512_align2 = COPY [[DEF1]]
65  ; GCN-NEXT:   [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_1]].sub0_sub1, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
66  ; GCN-NEXT:   [[DS_READ_B128_gfx9_4:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 1024, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
67  ; GCN-NEXT:   [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF33]], implicit $exec
68  ; GCN-NEXT:   [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF21]], implicit $exec
69  ; GCN-NEXT:   [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_1]].sub2_sub3, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
70  ; GCN-NEXT:   [[DS_READ_B128_gfx9_5:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 3088, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
71  ; GCN-NEXT:   [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF22]], implicit $exec
72  ; GCN-NEXT:   [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF23]], implicit $exec
73  ; GCN-NEXT:   [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_3]].sub0_sub1, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
74  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF4]], [[DEF16]], 0, 0, implicit $exec :: (store (s128) into %ir.in6, !alias.scope !0, addrspace 3)
75  ; GCN-NEXT:   [[DEF16:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF6]], [[DEF7]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 7)
76  ; GCN-NEXT:   dead [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_3]].sub2_sub3, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
77  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:areg_512_align2 = COPY [[DEF]]
78  ; GCN-NEXT:   undef [[DEF17:%[0-9]+]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF13]], [[DEF12]], [[DEF30]], implicit $exec
79  ; GCN-NEXT:   [[DEF17:%[0-9]+]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF15]], [[DEF14]], [[DEF30]], implicit $exec
80  ; GCN-NEXT:   [[DEF17:%[0-9]+]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF8]], [[DEF9]], [[DEF30]], implicit $exec
81  ; GCN-NEXT:   [[DEF17:%[0-9]+]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF11]], [[DEF10]], [[DEF30]], implicit $exec
82  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF5]], [[DEF17]], 0, 0, implicit $exec :: (store (s128) into %ir.in8, !alias.scope !0, addrspace 3)
83  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_4]].sub0_sub1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
84  ; GCN-NEXT:   undef [[DEF18:%[0-9]+]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF8]], [[DEF9]], [[DEF31]], implicit $exec
85  ; GCN-NEXT:   [[DEF18:%[0-9]+]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF11]], [[DEF10]], [[DEF31]], implicit $exec
86  ; GCN-NEXT:   [[DEF18:%[0-9]+]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF13]], [[DEF12]], [[DEF31]], implicit $exec
87  ; GCN-NEXT:   [[DEF18:%[0-9]+]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF15]], [[DEF14]], [[DEF31]], implicit $exec
88  ; GCN-NEXT:   DS_WRITE_B128_gfx9 [[DEF5]], [[DEF18]], 16, 0, implicit $exec :: (store (s128) into %ir.in9, !alias.scope !0, addrspace 3)
89  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_4]].sub2_sub3, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
90  ; GCN-NEXT:   [[DEF9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 7)
91  ; GCN-NEXT:   [[DEF8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_1]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 7)
92  ; GCN-NEXT:   [[DEF10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_2]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in12, !alias.scope !0, addrspace 7)
93  ; GCN-NEXT:   [[DEF11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_3]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in13, !alias.scope !0, addrspace 7)
94  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_5]].sub0_sub1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
95  ; GCN-NEXT:   [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF24]], implicit $exec
96  ; GCN-NEXT:   [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF25]], implicit $exec
97  ; GCN-NEXT:   [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF26]], implicit $exec
98  ; GCN-NEXT:   [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF27]], implicit $exec
99  ; GCN-NEXT:   [[DEF12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_4]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in14, !alias.scope !0, addrspace 7)
100  ; GCN-NEXT:   [[DEF13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_5]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in15, !alias.scope !0, addrspace 7)
101  ; GCN-NEXT:   [[DEF14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_6]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in16, !alias.scope !0, addrspace 7)
102  ; GCN-NEXT:   [[DEF15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_7]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in17, !alias.scope !0, addrspace 7)
103  ; GCN-NEXT:   dead [[COPY1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_5]].sub2_sub3, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec
104  ; GCN-NEXT:   IGLP_OPT 1
105  ; GCN-NEXT:   [[DEF29:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[DEF29]], -1, implicit-def dead $scc
106  ; GCN-NEXT:   S_CMP_LG_U32 [[DEF29]], 0, implicit-def $scc
107  ; GCN-NEXT:   [[DEF21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF21]], implicit $exec
108  ; GCN-NEXT:   [[DEF33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF33]], implicit $exec
109  ; GCN-NEXT:   [[DEF23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF23]], implicit $exec
110  ; GCN-NEXT:   [[DEF22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF22]], implicit $exec
111  ; GCN-NEXT:   [[DEF6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 64, [[DEF6]], implicit $exec
112  ; GCN-NEXT:   [[DEF27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF27]], implicit $exec
113  ; GCN-NEXT:   [[DEF26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF26]], implicit $exec
114  ; GCN-NEXT:   [[DEF25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF25]], implicit $exec
115  ; GCN-NEXT:   [[DEF24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF24]], implicit $exec
116  ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit killed $scc
117  ; GCN-NEXT: {{  $}}
118  ; GCN-NEXT: bb.2:
119  ; GCN-NEXT:   S_ENDPGM 0
120  bb.0:
121  %1076:av_512_align2 = IMPLICIT_DEF
122  %1073:av_512_align2 = IMPLICIT_DEF
123  %25:vgpr_32 = IMPLICIT_DEF
124  %26:vgpr_32 = IMPLICIT_DEF
125  %13:vgpr_32 = IMPLICIT_DEF
126  %15:vgpr_32 = IMPLICIT_DEF
127  %1215:vgpr_32 = IMPLICIT_DEF
128  %381:sgpr_128 = IMPLICIT_DEF
129  %1225:vgpr_32 = IMPLICIT_DEF
130  %1224:vgpr_32 = IMPLICIT_DEF
131  %1226:vgpr_32 = IMPLICIT_DEF
132  %1227:vgpr_32 = IMPLICIT_DEF
133  %1228:vgpr_32 = IMPLICIT_DEF
134  %1229:vgpr_32 = IMPLICIT_DEF
135  %1230:vgpr_32 = IMPLICIT_DEF
136  %1231:vgpr_32 = IMPLICIT_DEF
137  %1232:av_128_align2 = IMPLICIT_DEF
138  %1091:vreg_128_align2 = IMPLICIT_DEF
139  %1067:vreg_128_align2 = IMPLICIT_DEF
140  %27:vgpr_32 = IMPLICIT_DEF
141  %1216:vgpr_32 = IMPLICIT_DEF
142  %1217:vgpr_32 = IMPLICIT_DEF
143  %1218:vgpr_32 = IMPLICIT_DEF
144  %1219:vgpr_32 = IMPLICIT_DEF
145  %1220:vgpr_32 = IMPLICIT_DEF
146  %1221:vgpr_32 = IMPLICIT_DEF
147  %1222:vgpr_32 = IMPLICIT_DEF
148  %1223:vgpr_32 = IMPLICIT_DEF
149  %29:sreg_32 = IMPLICIT_DEF
150  %1214:sreg_32 = IMPLICIT_DEF
151  %419:sreg_32 = IMPLICIT_DEF
152  %421:sreg_32 = IMPLICIT_DEF
153  %387:sgpr_128 = IMPLICIT_DEF
154  %1216:vgpr_32 = IMPLICIT_DEF
155
156  bb.1:
157  IGLP_OPT 1
158  %489:av_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
159  %494:av_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
160  %499:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
161  %504:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 2064, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
162  %527:areg_512_align2 = COPY %1073:av_512_align2
163  %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub0_sub1:av_128_align2, %499.sub0_sub1:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
164  %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub2_sub3:av_128_align2, %499.sub2_sub3:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
165  %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub0_sub1:av_128_align2, %504.sub0_sub1:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
166  %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub2_sub3:av_128_align2, %504.sub2_sub3:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
167  %530:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 1024, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
168  %535:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 3088, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
169  %554:areg_512_align2 = COPY %1076:av_512_align2
170  %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub0_sub1:av_128_align2, %530.sub0_sub1:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
171  %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub2_sub3:av_128_align2, %530.sub2_sub3:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
172  %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub0_sub1:av_128_align2, %535.sub0_sub1:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
173  %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub2_sub3:av_128_align2, %535.sub2_sub3:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec
174  DS_WRITE_B128_gfx9 %13:vgpr_32, %1232:av_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in6, !alias.scope !0, addrspace 3)
175  %1232:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %1215:vgpr_32, %381:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 7)
176  %1091.sub0:vreg_128_align2 = V_PERM_B32_e64 %1225:vgpr_32, %1224:vgpr_32, %419:sreg_32, implicit $exec
177  %1067.sub0:vreg_128_align2 = V_PERM_B32_e64 %1225:vgpr_32, %1224:vgpr_32, %421:sreg_32, implicit $exec
178  %1091.sub1:vreg_128_align2 = V_PERM_B32_e64 %1227:vgpr_32, %1226:vgpr_32, %419:sreg_32, implicit $exec
179  %1067.sub1:vreg_128_align2 = V_PERM_B32_e64 %1227:vgpr_32, %1226:vgpr_32, %421:sreg_32, implicit $exec
180  %1091.sub2:vreg_128_align2 = V_PERM_B32_e64 %1229:vgpr_32, %1228:vgpr_32, %419:sreg_32, implicit $exec
181  %1067.sub2:vreg_128_align2 = V_PERM_B32_e64 %1229:vgpr_32, %1228:vgpr_32, %421:sreg_32, implicit $exec
182  %1091.sub3:vreg_128_align2 = V_PERM_B32_e64 %1231:vgpr_32, %1230:vgpr_32, %419:sreg_32, implicit $exec
183  %1067.sub3:vreg_128_align2 = V_PERM_B32_e64 %1231:vgpr_32, %1230:vgpr_32, %421:sreg_32, implicit $exec
184  DS_WRITE_B128_gfx9 %15:vgpr_32, %1091:vreg_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in8, !alias.scope !0, addrspace 3)
185  DS_WRITE_B128_gfx9 %15:vgpr_32, %1067:vreg_128_align2, 16, 0, implicit $exec :: (store (s128) into %ir.in9, !alias.scope !0, addrspace 3)
186  %572:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1216:vgpr_32, implicit $exec
187  %1224:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %572:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 7)
188  %573:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1217:vgpr_32, implicit $exec
189  %1225:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %573:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 7)
190  %574:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1218:vgpr_32, implicit $exec
191  %1226:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %574:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in12, !alias.scope !0, addrspace 7)
192  %575:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1219:vgpr_32, implicit $exec
193  %1227:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %575:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in13, !alias.scope !0, addrspace 7)
194  %576:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1220:vgpr_32, implicit $exec
195  %1228:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %576:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in14, !alias.scope !0, addrspace 7)
196  %577:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1221:vgpr_32, implicit $exec
197  %1229:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %577:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in15, !alias.scope !0, addrspace 7)
198  %578:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1222:vgpr_32, implicit $exec
199  %1230:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %578:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in16, !alias.scope !0, addrspace 7)
200  %579:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1223:vgpr_32, implicit $exec
201  %1231:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %579:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in17, !alias.scope !0, addrspace 7)
202  %1223:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1223:vgpr_32, implicit $exec
203  %1222:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1222:vgpr_32, implicit $exec
204  %1221:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1221:vgpr_32, implicit $exec
205  %1220:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1220:vgpr_32, implicit $exec
206  %1219:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1219:vgpr_32, implicit $exec
207  %1218:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1218:vgpr_32, implicit $exec
208  %1217:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1217:vgpr_32, implicit $exec
209  %1216:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1216:vgpr_32, implicit $exec
210  %1215:vgpr_32 = V_ADD_U32_e32 64, %1215:vgpr_32, implicit $exec
211  %1214:sreg_32 = nsw S_ADD_I32 %1214:sreg_32, -1, implicit-def dead $scc
212  S_CMP_LG_U32 %1214:sreg_32, 0, implicit-def $scc
213  S_CBRANCH_SCC1 %bb.1, implicit killed $scc
214
215  bb.2:
216  S_ENDPGM 0
217---
218