1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 2# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s 3 4--- | 5 define amdgpu_kernel void @single-wave-phase-2c(ptr addrspace(3) noalias %in0, ptr addrspace(3) noalias %in1, ptr addrspace(3) noalias %in2, ptr addrspace(3) noalias %in3, ptr addrspace(3) noalias %in4, ptr addrspace(3) noalias %in5, ptr addrspace(3) noalias %in6, ptr addrspace(3) noalias %in7, ptr addrspace(3) noalias %in8, ptr addrspace(3) noalias %in9, ptr addrspace(3) noalias %in10, ptr addrspace(3) noalias %in11, ptr addrspace(7) noalias %in12, ptr addrspace(7) noalias %in13, ptr addrspace(7) noalias %in14, ptr addrspace(7) noalias %in15, ptr addrspace(7) noalias %in16, ptr addrspace(7) noalias %in17) #0 { ret void } 6 7 8 !0 = distinct !{!0} 9 !1 = !{!1, !0} 10... 11 12--- 13name: single-wave-phase-2c 14tracksRegLiveness: true 15machineFunctionInfo: 16 occupancy: 1 17body: | 18 ; GCN-LABEL: name: single-wave-phase-2c 19 ; GCN: bb.0: 20 ; GCN-NEXT: successors: %bb.1(0x80000000) 21 ; GCN-NEXT: {{ $}} 22 ; GCN-NEXT: [[DEF:%[0-9]+]]:av_512_align2 = IMPLICIT_DEF 23 ; GCN-NEXT: [[DEF1:%[0-9]+]]:av_512_align2 = IMPLICIT_DEF 24 ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 25 ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 26 ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 27 ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 28 ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 29 ; GCN-NEXT: [[DEF7:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 30 ; GCN-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 31 ; GCN-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 32 ; GCN-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 33 ; GCN-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 34 ; GCN-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 35 ; GCN-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 36 ; GCN-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 37 ; GCN-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 38 ; GCN-NEXT: [[DEF16:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF 39 ; GCN-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF 40 ; GCN-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF 41 ; GCN-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 42 ; GCN-NEXT: dead [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 43 ; GCN-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 44 ; GCN-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 45 ; GCN-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 46 ; GCN-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 47 ; GCN-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 48 ; GCN-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 49 ; GCN-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 50 ; GCN-NEXT: [[DEF28:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 51 ; GCN-NEXT: [[DEF29:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 52 ; GCN-NEXT: [[DEF30:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 53 ; GCN-NEXT: [[DEF31:%[0-9]+]]:sreg_32 = IMPLICIT_DEF 54 ; GCN-NEXT: [[DEF32:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF 55 ; GCN-NEXT: [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 56 ; GCN-NEXT: {{ $}} 57 ; GCN-NEXT: bb.1: 58 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) 59 ; GCN-NEXT: {{ $}} 60 ; GCN-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3) 61 ; GCN-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 0, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3) 62 ; GCN-NEXT: [[DS_READ_B128_gfx9_2:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3) 63 ; GCN-NEXT: [[DS_READ_B128_gfx9_3:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 2064, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3) 64 ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_512_align2 = COPY [[DEF1]] 65 ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_1]].sub0_sub1, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec 66 ; GCN-NEXT: [[DS_READ_B128_gfx9_4:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 1024, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3) 67 ; GCN-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF33]], implicit $exec 68 ; GCN-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF21]], implicit $exec 69 ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_1]].sub2_sub3, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec 70 ; GCN-NEXT: [[DS_READ_B128_gfx9_5:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 3088, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3) 71 ; GCN-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF22]], implicit $exec 72 ; GCN-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF23]], implicit $exec 73 ; GCN-NEXT: [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_3]].sub0_sub1, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec 74 ; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF4]], [[DEF16]], 0, 0, implicit $exec :: (store (s128) into %ir.in6, !alias.scope !0, addrspace 3) 75 ; GCN-NEXT: [[DEF16:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF6]], [[DEF7]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 7) 76 ; GCN-NEXT: dead [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_3]].sub2_sub3, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec 77 ; GCN-NEXT: [[COPY1:%[0-9]+]]:areg_512_align2 = COPY [[DEF]] 78 ; GCN-NEXT: undef [[DEF17:%[0-9]+]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF13]], [[DEF12]], [[DEF30]], implicit $exec 79 ; GCN-NEXT: [[DEF17:%[0-9]+]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF15]], [[DEF14]], [[DEF30]], implicit $exec 80 ; GCN-NEXT: [[DEF17:%[0-9]+]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF8]], [[DEF9]], [[DEF30]], implicit $exec 81 ; GCN-NEXT: [[DEF17:%[0-9]+]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF11]], [[DEF10]], [[DEF30]], implicit $exec 82 ; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF5]], [[DEF17]], 0, 0, implicit $exec :: (store (s128) into %ir.in8, !alias.scope !0, addrspace 3) 83 ; GCN-NEXT: [[COPY1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_4]].sub0_sub1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec 84 ; GCN-NEXT: undef [[DEF18:%[0-9]+]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF8]], [[DEF9]], [[DEF31]], implicit $exec 85 ; GCN-NEXT: [[DEF18:%[0-9]+]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF11]], [[DEF10]], [[DEF31]], implicit $exec 86 ; GCN-NEXT: [[DEF18:%[0-9]+]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF13]], [[DEF12]], [[DEF31]], implicit $exec 87 ; GCN-NEXT: [[DEF18:%[0-9]+]].sub3:vreg_128_align2 = V_PERM_B32_e64 [[DEF15]], [[DEF14]], [[DEF31]], implicit $exec 88 ; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF5]], [[DEF18]], 16, 0, implicit $exec :: (store (s128) into %ir.in9, !alias.scope !0, addrspace 3) 89 ; GCN-NEXT: [[COPY1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_4]].sub2_sub3, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec 90 ; GCN-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 7) 91 ; GCN-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_1]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 7) 92 ; GCN-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_2]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in12, !alias.scope !0, addrspace 7) 93 ; GCN-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_3]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in13, !alias.scope !0, addrspace 7) 94 ; GCN-NEXT: [[COPY1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_5]].sub0_sub1, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec 95 ; GCN-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF24]], implicit $exec 96 ; GCN-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF25]], implicit $exec 97 ; GCN-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF26]], implicit $exec 98 ; GCN-NEXT: [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF27]], implicit $exec 99 ; GCN-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_4]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in14, !alias.scope !0, addrspace 7) 100 ; GCN-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_5]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in15, !alias.scope !0, addrspace 7) 101 ; GCN-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_6]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in16, !alias.scope !0, addrspace 7) 102 ; GCN-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_ADD_U32_e32_7]], [[DEF32]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in17, !alias.scope !0, addrspace 7) 103 ; GCN-NEXT: dead [[COPY1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_5]].sub2_sub3, [[COPY1]], 0, 0, 0, implicit $mode, implicit $exec 104 ; GCN-NEXT: IGLP_OPT 1 105 ; GCN-NEXT: [[DEF29:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[DEF29]], -1, implicit-def dead $scc 106 ; GCN-NEXT: S_CMP_LG_U32 [[DEF29]], 0, implicit-def $scc 107 ; GCN-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF21]], implicit $exec 108 ; GCN-NEXT: [[DEF33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF33]], implicit $exec 109 ; GCN-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF23]], implicit $exec 110 ; GCN-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF22]], implicit $exec 111 ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 64, [[DEF6]], implicit $exec 112 ; GCN-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF27]], implicit $exec 113 ; GCN-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF26]], implicit $exec 114 ; GCN-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF25]], implicit $exec 115 ; GCN-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF28]], [[DEF24]], implicit $exec 116 ; GCN-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc 117 ; GCN-NEXT: {{ $}} 118 ; GCN-NEXT: bb.2: 119 ; GCN-NEXT: S_ENDPGM 0 120 bb.0: 121 %1076:av_512_align2 = IMPLICIT_DEF 122 %1073:av_512_align2 = IMPLICIT_DEF 123 %25:vgpr_32 = IMPLICIT_DEF 124 %26:vgpr_32 = IMPLICIT_DEF 125 %13:vgpr_32 = IMPLICIT_DEF 126 %15:vgpr_32 = IMPLICIT_DEF 127 %1215:vgpr_32 = IMPLICIT_DEF 128 %381:sgpr_128 = IMPLICIT_DEF 129 %1225:vgpr_32 = IMPLICIT_DEF 130 %1224:vgpr_32 = IMPLICIT_DEF 131 %1226:vgpr_32 = IMPLICIT_DEF 132 %1227:vgpr_32 = IMPLICIT_DEF 133 %1228:vgpr_32 = IMPLICIT_DEF 134 %1229:vgpr_32 = IMPLICIT_DEF 135 %1230:vgpr_32 = IMPLICIT_DEF 136 %1231:vgpr_32 = IMPLICIT_DEF 137 %1232:av_128_align2 = IMPLICIT_DEF 138 %1091:vreg_128_align2 = IMPLICIT_DEF 139 %1067:vreg_128_align2 = IMPLICIT_DEF 140 %27:vgpr_32 = IMPLICIT_DEF 141 %1216:vgpr_32 = IMPLICIT_DEF 142 %1217:vgpr_32 = IMPLICIT_DEF 143 %1218:vgpr_32 = IMPLICIT_DEF 144 %1219:vgpr_32 = IMPLICIT_DEF 145 %1220:vgpr_32 = IMPLICIT_DEF 146 %1221:vgpr_32 = IMPLICIT_DEF 147 %1222:vgpr_32 = IMPLICIT_DEF 148 %1223:vgpr_32 = IMPLICIT_DEF 149 %29:sreg_32 = IMPLICIT_DEF 150 %1214:sreg_32 = IMPLICIT_DEF 151 %419:sreg_32 = IMPLICIT_DEF 152 %421:sreg_32 = IMPLICIT_DEF 153 %387:sgpr_128 = IMPLICIT_DEF 154 %1216:vgpr_32 = IMPLICIT_DEF 155 156 bb.1: 157 IGLP_OPT 1 158 %489:av_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3) 159 %494:av_128_align2 = DS_READ_B128_gfx9 %25:vgpr_32, 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3) 160 %499:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 0, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3) 161 %504:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 2064, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3) 162 %527:areg_512_align2 = COPY %1073:av_512_align2 163 %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub0_sub1:av_128_align2, %499.sub0_sub1:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec 164 %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub2_sub3:av_128_align2, %499.sub2_sub3:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec 165 %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub0_sub1:av_128_align2, %504.sub0_sub1:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec 166 %527:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub2_sub3:av_128_align2, %504.sub2_sub3:av_128_align2, %527:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec 167 %530:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 1024, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3) 168 %535:av_128_align2 = DS_READ_B128_gfx9 %26:vgpr_32, 3088, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3) 169 %554:areg_512_align2 = COPY %1076:av_512_align2 170 %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub0_sub1:av_128_align2, %530.sub0_sub1:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec 171 %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %489.sub2_sub3:av_128_align2, %530.sub2_sub3:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec 172 %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub0_sub1:av_128_align2, %535.sub0_sub1:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec 173 %554:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 %494.sub2_sub3:av_128_align2, %535.sub2_sub3:av_128_align2, %554:areg_512_align2, 0, 0, 0, implicit $mode, implicit $exec 174 DS_WRITE_B128_gfx9 %13:vgpr_32, %1232:av_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in6, !alias.scope !0, addrspace 3) 175 %1232:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %1215:vgpr_32, %381:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 7) 176 %1091.sub0:vreg_128_align2 = V_PERM_B32_e64 %1225:vgpr_32, %1224:vgpr_32, %419:sreg_32, implicit $exec 177 %1067.sub0:vreg_128_align2 = V_PERM_B32_e64 %1225:vgpr_32, %1224:vgpr_32, %421:sreg_32, implicit $exec 178 %1091.sub1:vreg_128_align2 = V_PERM_B32_e64 %1227:vgpr_32, %1226:vgpr_32, %419:sreg_32, implicit $exec 179 %1067.sub1:vreg_128_align2 = V_PERM_B32_e64 %1227:vgpr_32, %1226:vgpr_32, %421:sreg_32, implicit $exec 180 %1091.sub2:vreg_128_align2 = V_PERM_B32_e64 %1229:vgpr_32, %1228:vgpr_32, %419:sreg_32, implicit $exec 181 %1067.sub2:vreg_128_align2 = V_PERM_B32_e64 %1229:vgpr_32, %1228:vgpr_32, %421:sreg_32, implicit $exec 182 %1091.sub3:vreg_128_align2 = V_PERM_B32_e64 %1231:vgpr_32, %1230:vgpr_32, %419:sreg_32, implicit $exec 183 %1067.sub3:vreg_128_align2 = V_PERM_B32_e64 %1231:vgpr_32, %1230:vgpr_32, %421:sreg_32, implicit $exec 184 DS_WRITE_B128_gfx9 %15:vgpr_32, %1091:vreg_128_align2, 0, 0, implicit $exec :: (store (s128) into %ir.in8, !alias.scope !0, addrspace 3) 185 DS_WRITE_B128_gfx9 %15:vgpr_32, %1067:vreg_128_align2, 16, 0, implicit $exec :: (store (s128) into %ir.in9, !alias.scope !0, addrspace 3) 186 %572:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1216:vgpr_32, implicit $exec 187 %1224:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %572:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 7) 188 %573:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1217:vgpr_32, implicit $exec 189 %1225:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %573:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 7) 190 %574:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1218:vgpr_32, implicit $exec 191 %1226:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %574:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in12, !alias.scope !0, addrspace 7) 192 %575:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1219:vgpr_32, implicit $exec 193 %1227:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %575:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in13, !alias.scope !0, addrspace 7) 194 %576:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1220:vgpr_32, implicit $exec 195 %1228:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %576:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in14, !alias.scope !0, addrspace 7) 196 %577:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1221:vgpr_32, implicit $exec 197 %1229:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %577:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in15, !alias.scope !0, addrspace 7) 198 %578:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1222:vgpr_32, implicit $exec 199 %1230:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %578:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in16, !alias.scope !0, addrspace 7) 200 %579:vgpr_32 = V_ADD_U32_e32 %27:vgpr_32, %1223:vgpr_32, implicit $exec 201 %1231:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %579:vgpr_32, %387:sgpr_128, 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in17, !alias.scope !0, addrspace 7) 202 %1223:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1223:vgpr_32, implicit $exec 203 %1222:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1222:vgpr_32, implicit $exec 204 %1221:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1221:vgpr_32, implicit $exec 205 %1220:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1220:vgpr_32, implicit $exec 206 %1219:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1219:vgpr_32, implicit $exec 207 %1218:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1218:vgpr_32, implicit $exec 208 %1217:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1217:vgpr_32, implicit $exec 209 %1216:vgpr_32 = V_ADD_U32_e32 %29:sreg_32, %1216:vgpr_32, implicit $exec 210 %1215:vgpr_32 = V_ADD_U32_e32 64, %1215:vgpr_32, implicit $exec 211 %1214:sreg_32 = nsw S_ADD_I32 %1214:sreg_32, -1, implicit-def dead $scc 212 S_CMP_LG_U32 %1214:sreg_32, 0, implicit-def $scc 213 S_CBRANCH_SCC1 %bb.1, implicit killed $scc 214 215 bb.2: 216 S_ENDPGM 0 217--- 218