1# RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX940 %s 2# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s 3 4# GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read 5# GCN: V_MOV_B32 6# GCN: V_MOV_B32 7# GCN-NEXT: S_NOP 1 8# GCN-NEXT: V_MFMA 9name: valu_write_vgpr_sgemm_mfma_read 10body: | 11 bb.0: 12 $vgpr0 = V_MOV_B32_e32 1, implicit $exec 13 $vgpr1 = V_MOV_B32_e32 1, implicit $exec 14 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 15... 16# GCN-LABEL: name: valu_write_agpr_sgemm_mfma_read 17# GCN: V_ACCVGPR_WRITE_B32_e64 18# GCN-NEXT: S_NOP 1 19# GCN-NEXT: V_MFMA 20name: valu_write_agpr_sgemm_mfma_read 21body: | 22 bb.0: 23 $vgpr0 = IMPLICIT_DEF 24 $agpr4 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec 25 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr4, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 26... 27# GCN-LABEL: name: valu_write_vgpr_dgemm_mfma_read 28# GCN: V_MOV_B32 29# GCN: V_MOV_B32 30# GCN-NEXT: S_NOP 1 31# GCN-NEXT: V_MFMA 32name: valu_write_vgpr_dgemm_mfma_read 33body: | 34 bb.0: 35 $vgpr0 = V_MOV_B32_e32 1, implicit $exec 36 $vgpr1 = V_MOV_B32_e32 1, implicit $exec 37 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 38... 39# GCN-LABEL: name: valu_write_vgpr_smfmac_read 40# GCN: V_MOV_B32 41# GCN-NEXT: S_NOP 1 42# GCN-NEXT: V_SMFMAC 43name: valu_write_vgpr_smfmac_read 44body: | 45 bb.0: 46 $vgpr32 = V_MOV_B32_e32 1, implicit $exec 47 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 48... 49# GCN-LABEL: name: accmov_write_agpr_sgemm_mfma_read 50# GCN: V_ACCVGPR_MOV_B32 51# GCN-NEXT: S_NOP 1 52# GCN-NEXT: V_MFMA 53name: accmov_write_agpr_sgemm_mfma_read 54body: | 55 bb.0: 56 $vgpr0 = IMPLICIT_DEF 57 $agpr4 = V_ACCVGPR_MOV_B32 $agpr5, implicit $exec 58 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr4, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 59... 60# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc 61# GCN: V_MFMA 62# GCN-NEXT: S_NOP 1 63# GCN-NEXT: V_MFMA 64name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc 65body: | 66 bb.0: 67 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 68 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 69... 70# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 71# GCN: V_MFMA 72# GCN-NEXT: S_NOP 1 73# GCN-NEXT: V_MFMA 74name: nonxdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 75body: | 76 bb.0: 77 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 78 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 79... 80# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 81# GCN: V_MFMA 82# GCN-NEXT: V_MFMA 83name: nonxdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 84body: | 85 bb.0: 86 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 87 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 88... 89# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_smfmac_read_same_agpr_as_srcc 90# GCN: V_MFMA 91# GCN-NEXT: S_NOP 1 92# GCN-NEXT: V_SMFMAC 93name: nonxdl_sgemm4x4_mfma_write_agpr_smfmac_read_same_agpr_as_srcc 94body: | 95 bb.0: 96 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr4, $vgpr5, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 97 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 98... 99# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 100# GCN: V_MFMA 101# GCN-NEXT: V_MFMA 102name: dgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 103body: | 104 bb.0: 105 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 106 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 107... 108# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 109# GCN: V_MFMA 110# GCN-NEXT: S_NOP 3 111# GCN-NEXT: V_MFMA 112name: dgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 113body: | 114 bb.0: 115 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 116 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 117... 118# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 119# GCN: V_MFMA 120# GCN-NEXT: S_NOP 1 121# GCN-NEXT: V_MFMA 122name: xdl_sgemm4x4_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 123body: | 124 bb.0: 125 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 126 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 127... 128# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 129# GCN: V_MFMA 130# GCN-NEXT: V_MFMA 131name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 132body: | 133 bb.0: 134 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 135 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 136... 137# GCN-LABEL: name: smfmac16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 138# GCN: V_SMFMAC 139# GCN-NEXT: V_SMFMAC 140name: smfmac16x16_mfma_write_vgpr_mfma_read_same_vgpr_as_srcc 141body: | 142 bb.0: 143 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 144 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 145... 146# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_mfma_read_overlap 147# GCN: V_MFMA 148# GFX940-NEXT: S_NOP 2 149# GFX950-NEXT: S_NOP 3 150# GCN-NEXT: V_MFMA 151name: sgemm4x4_mfma_write_agpr_mfma_read_overlap 152body: | 153 bb.0: 154 $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 155 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 156... 157# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap 158# GCN: V_MFMA 159# GCN-NEXT: S_NOP 2 160# GCN-NEXT: V_MFMA 161name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap 162body: | 163 bb.0: 164 $vgpr2_vgpr3_vgpr4_vgpr5 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr6, $vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 165 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr6, $vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 166... 167# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap 168# GCN: V_MFMA 169# GFX940-NEXT: S_NOP 2 170# GFX950-NEXT: S_NOP 3 171# GCN-NEXT: V_SMFMAC 172name: sgemm4x4_mfma_write_agpr_smfmac_read_overlap 173body: | 174 bb.0: 175 $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 176 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 177... 178# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap 179# GCN: V_MFMA 180# GFX940-NEXT: S_NOP 7 181# GFX940-NEXT: S_NOP 0 182 183# GFX950-NEXT: S_NOP 7 184# GFX950-NEXT: S_NOP 1 185# GCN-NEXT: V_MFMA 186name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_overlap 187body: | 188 bb.0: 189 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 190 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 191... 192# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap 193# GCN: V_MFMA 194# GFX940-NEXT: S_NOP 7 195# GFX940-NEXT: S_NOP 0 196 197# GFX950-NEXT: S_NOP 7 198# GFX950-NEXT: S_NOP 1 199# GCN-NEXT: V_MFMA 200name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_overlap 201body: | 202 bb.0: 203 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 204 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 205... 206# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_agpr_xdl_mfma_read_overlap 207# GCN: V_MFMA 208# GCN-NEXT: V_MFMA 209name: nonxdl_sgemm16x16_mfma_write_agpr_xdl_mfma_read_overlap 210body: | 211 bb.0: 212 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 213 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 214... 215# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_agpr_nonxdl_mfma_read_overlap 216# GCN: V_MFMA 217# GCN-NEXT: S_NOP 7 218# GCN-NEXT: V_MFMA 219name: nonxdl_sgemm16x16_mfma_write_agpr_nonxdl_mfma_read_overlap 220body: | 221 bb.0: 222 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 223 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 224... 225# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap 226# GCN: V_MFMA 227# GFX940-NEXT: S_NOP 7 228# GFX940-NEXT: S_NOP 0 229 230# GFX950-NEXT: S_NOP 7 231# GFX950-NEXT: S_NOP 1 232# GCN-NEXT: V_SMFMAC 233name: xdl_sgemm16x16_mfma_write_agpr_smfmac_read_overlap 234body: | 235 bb.0: 236 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 237 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 238... 239# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap 240# GCN: V_MFMA 241# GCN-NEXT: S_NOP 7 242# GCN-NEXT: S_NOP 7 243# GFX940-NEXT: S_NOP 0 244# GFX950-NEXT: S_NOP 1 245# GCN-NEXT: V_MFMA 246name: xdl_sgemm32x32_mfma_write_agpr_mfma_read_overlap 247body: | 248 bb.0: 249 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec 250 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 251... 252# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap 253# GCN: V_MFMA 254# GCN-NEXT: S_NOP 7 255# GCN-NEXT: S_NOP 7 256# GFX940-NEXT: S_NOP 0 257# GFX950-NEXT: S_NOP 1 258# GCN-NEXT: V_MFMA 259name: xdl_sgemm32x32_mfma_write_vgpr_mfma_read_overlap 260body: | 261 bb.0: 262 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 263 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr126, $vgpr127, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 264... 265# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_write_agpr_xdl_mfma_read_overlap 266# GCN: V_MFMA 267# GCN-NEXT: V_MFMA 268name: nonxdl_sgemm32x32_mfma_write_agpr_xdl_mfma_read_overlap 269body: | 270 bb.0: 271 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X1F32_e64 $vgpr26, $vgpr28, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec 272 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 273... 274# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_write_agpr_nonxdl_mfma_read_overlap 275# GCN: V_MFMA 276# GCN-NEXT: S_NOP 7 277# GCN-NEXT: S_NOP 7 278# GCN-NEXT: V_MFMA 279name: nonxdl_sgemm32x32_mfma_write_agpr_nonxdl_mfma_read_overlap 280body: | 281 bb.0: 282 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X1F32_e64 $vgpr26, $vgpr28, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec 283 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X2F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 284... 285# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap 286# GCN: V_MFMA 287# GCN-NEXT: S_NOP 7 288# GCN-NEXT: S_NOP 7 289# GFX940-NEXT: S_NOP 0 290# GFX950-NEXT: S_NOP 1 291# GCN-NEXT: V_SMFMAC 292name: xdl_sgemm32x32_mfma_write_agpr_smfmac_read_overlap 293body: | 294 bb.0: 295 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31_agpr32_agpr33 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec 296 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 297... 298# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap 299# GCN: V_MFMA 300# GFX940-NEXT: S_NOP 7 301# GFX940-NEXT: S_NOP 0 302 303# GFX950-NEXT: S_NOP 7 304# GFX950-NEXT: S_NOP 7 305# GFX950-NEXT: S_NOP 0 306# GCN-NEXT: V_MFMA 307name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap 308body: | 309 bb.0: 310 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 311 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 312... 313# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_read_overlap 314# GCN: V_MFMA 315# GCN-NEXT: S_NOP 3 316# GCN-NEXT: V_MFMA 317name: dgemm4x4_mfma_write_vgpr_mfma_read_overlap 318body: | 319 bb.0: 320 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 321 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 322... 323# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap 324# GCN: V_MFMA 325# GFX940-NEXT: S_NOP 7 326# GFX940-NEXT: S_NOP 0 327 328# GFX950-NEXT: S_NOP 7 329# GFX950-NEXT: S_NOP 7 330# GFX950-NEXT: S_NOP 0 331# GCN-NEXT: V_MFMA 332name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap 333body: | 334 bb.0: 335 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 336 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr10, $vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 337... 338# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_read_overlap 339# GCN: V_MFMA 340# GCN-NEXT: S_NOP 3 341# GCN-NEXT: V_MFMA 342name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_read_overlap 343body: | 344 bb.0: 345 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 346 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr10, $vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 347... 348# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap 349# GCN: V_MFMA 350# GCN-NEXT: S_NOP 2 351# GCN-NEXT: V_MFMA 352name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap 353body: | 354 bb.0: 355 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr10, $vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 356 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 357... 358# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap 359# GCN: V_MFMA 360# GCN-NEXT: S_NOP 7 361# GFX940-NEXT: S_NOP 0 362# GFX950-NEXT: S_NOP 1 363# GCN-NEXT: V_MFMA 364name: xdl_sgemm16x16_mfma_write_vgpr_dgemm_mfma_read_overlap 365body: | 366 bb.0: 367 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 368 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr20_vgpr21, $vgpr20_vgpr21, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 369... 370# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap 371# GCN: V_MFMA 372# GCN-NEXT: S_NOP 7 373# GCN-NEXT: S_NOP 7 374# GFX940-NEXT: S_NOP 0 375# GFX950-NEXT: S_NOP 1 376# GCN-NEXT: V_MFMA 377name: xdl_sgemm32x32_mfma_write_vgpr_dgemm_mfma_read_overlap 378body: | 379 bb.0: 380 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 381 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr120_vgpr121, $vgpr120_vgpr121, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 382... 383# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial 384# GCN: V_MFMA 385# GCN-NEXT: S_NOP 7 386# GFX940-NEXT: S_NOP 0 387# GFX950-NEXT: S_NOP 1 388# GCN-NEXT: V_MFMA 389name: xdl_sgemm16x16_mfma_write_agpr_mfma_read_partial 390body: | 391 bb.0: 392 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 393 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 394... 395# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial 396# GCN: V_MFMA 397# GCN-NEXT: S_NOP 7 398# GFX940-NEXT: S_NOP 0 399# GFX950-NEXT: S_NOP 1 400# GCN-NEXT: V_MFMA 401name: xdl_sgemm16x16_mfma_write_vgpr_mfma_read_partial 402body: | 403 bb.0: 404 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 405 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 406... 407# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap 408# GCN: V_MFMA 409# GCN-NEXT: S_NOP 4 410# GCN-NEXT: V_MFMA 411name: xdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap 412body: | 413 bb.0: 414 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 415 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 416... 417# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap 418# GCN: V_MFMA 419# GCN-NEXT: S_NOP 7 420# GCN-NEXT: S_NOP 2 421# GCN-NEXT: V_MFMA 422name: xdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap 423body: | 424 bb.0: 425 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 426 $agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec 427... 428# GCN-LABEL: name: nonxdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap 429# GCN: V_MFMA 430# GCN-NEXT: S_NOP 7 431# GCN-NEXT: S_NOP 1 432# GCN-NEXT: V_MFMA 433name: nonxdl_sgemm16x16_mfma_write_agpr_mfma_srca_read_overlap 434body: | 435 bb.0: 436 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_16X16X1F32_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 437 $agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec 438... 439# GCN-LABEL: name: smfmac32x32_write_agpr_mfma_srca_read_overlap 440# GCN: V_SMFMAC 441# GCN-NEXT: S_NOP 7 442# GCN-NEXT: S_NOP 2 443# GCN-NEXT: V_MFMA 444name: smfmac32x32_write_agpr_mfma_srca_read_overlap 445body: | 446 bb.0: 447 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec 448 $agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec 449... 450# GCN-LABEL: name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap 451# GCN: V_SMFMAC 452# GCN-NEXT: S_NOP 7 453# GCN-NEXT: S_NOP 2 454# GCN-NEXT: V_SMFMAC 455name: smfmac32x32_write_agpr_smfmac_srcc_read_overlap 456body: | 457 bb.0: 458 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec 459 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec 460... 461# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap 462# GCN: V_MFMA 463# GCN-NEXT: S_NOP 7 464# GCN-NEXT: S_NOP 7 465# GCN-NEXT: S_NOP 2 466# GCN-NEXT: V_MFMA 467name: xdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap 468body: | 469 bb.0: 470 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec 471 $agpr20_agpr21_agpr22_agpr23 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr20_agpr21_agpr22_agpr23, 0, 0, 0, implicit $mode, implicit $exec 472... 473# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap 474# GCN: V_MFMA 475# GCN-NEXT: S_NOP 7 476# GCN-NEXT: S_NOP 7 477# GCN-NEXT: S_NOP 1 478# GCN-NEXT: V_MFMA 479name: nonxdl_sgemm32x32_mfma_write_agpr_mfma_srca_read_overlap 480body: | 481 bb.0: 482 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32_e64 $vgpr26, $vgpr28, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec 483 $agpr120_agpr121_agpr122_agpr123 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr120_agpr121_agpr122_agpr123, 0, 0, 0, implicit $mode, implicit $exec 484... 485# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap 486# GCN: V_MFMA 487# GCN-NEXT: S_NOP 4 488# GCN-NEXT: V_MFMA 489name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap 490body: | 491 bb.0: 492 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 493 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr0, $agpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 494... 495# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_dmfma4x4_srca_read_overlap 496# GCN: V_MFMA 497# GCN-NEXT: S_NOP 4 498# GCN-NEXT: V_MFMA 499name: xdl_sgemm4x4_mfma_write_vgpr_dmfma4x4_srca_read_overlap 500body: | 501 bb.0: 502 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 503 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 504... 505# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_dmfma16x16_srca_read_overlap 506# GCN: V_MFMA 507# GCN-NEXT: S_NOP 4 508# GCN-NEXT: V_MFMA 509name: xdl_sgemm4x4_mfma_write_vgpr_dmfma16x16_srca_read_overlap 510body: | 511 bb.0: 512 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 513 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 514... 515# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srca_read_overlap 516# GCN: V_MFMA 517# GCN-NEXT: S_NOP 4 518# GCN-NEXT: V_SMFMAC 519name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srca_read_overlap 520body: | 521 bb.0: 522 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 523 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 524... 525# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap 526# GCN: V_MFMA 527# GCN-NEXT: S_NOP 5 528# GCN-NEXT: V_MFMA 529name: dgemm4x4_mfma_write_vgpr_mfma_srca_read_overlap 530body: | 531 bb.0: 532 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 533 $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr2_vgpr3, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 534... 535# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap 536# GCN: V_MFMA 537# GFX940-NEXT: S_NOP 7 538# GFX940-NEXT: S_NOP 2 539 540# GFX950-NEXT: S_NOP 7 541# GFX950-NEXT: S_NOP 7 542# GFX950-NEXT: S_NOP 2 543# GCN-NEXT: V_MFMA 544name: dgemm16x16_mfma_write_vgpr_mfma_srca_read_overlap 545body: | 546 bb.0: 547 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 548 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 549... 550# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_srca_read_overlap 551# GCN: V_MFMA 552# GCN-NEXT: S_NOP 5 553# GCN-NEXT: V_MFMA 554name: dgemm4x4_mfma_write_vgpr_sgemm_mfma_srca_read_overlap 555body: | 556 bb.0: 557 $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 558 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 559... 560# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap 561# GCN: V_MFMA 562# GFX940-NEXT: S_NOP 7 563# GFX940-NEXT: S_NOP 2 564 565# GFX950-NEXT: S_NOP 7 566# GFX950-NEXT: S_NOP 7 567# GFX950-NEXT: S_NOP 2 568# GCN-NEXT: V_MFMA 569name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap 570body: | 571 bb.0: 572 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 573 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 574... 575# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_dgemm_mfma_srca_read_overlap 576# GCN: V_MFMA 577# GCN-NEXT: S_NOP 4 578# GCN-NEXT: V_MFMA 579name: xdl_sgemm4x4_mfma_write_vgpr_dgemm_mfma_srca_read_overlap 580body: | 581 bb.0: 582 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 583 $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 584... 585# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap 586# GCN: V_MFMA 587# GCN-NEXT: S_NOP 4 588# GCN-NEXT: V_MFMA 589name: xdl_sgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap 590body: | 591 bb.0: 592 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 593 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $agpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 594... 595# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap 596# GCN: V_MFMA 597# GCN-NEXT: S_NOP 4 598# GCN-NEXT: V_MFMA 599name: xdl_sgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap 600body: | 601 bb.0: 602 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 603 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 604... 605# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap 606# GCN: V_MFMA 607# GCN-NEXT: S_NOP 5 608# GCN-NEXT: V_MFMA 609name: dgemm4x4_mfma_write_vgpr_mfma_srcb_read_overlap 610body: | 611 bb.0: 612 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 613 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 614... 615# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_smfmac_srcb_read_overlap 616# GCN: V_MFMA 617# GCN-NEXT: S_NOP 5 618# GCN-NEXT: V_SMFMAC 619name: dgemm4x4_mfma_write_vgpr_smfmac_srcb_read_overlap 620body: | 621 bb.0: 622 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 623 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 624... 625# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap 626# GCN: V_MFMA 627# GCN-NEXT: S_NOP 5 628# GCN-NEXT: V_SMFMAC 629name: dgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap 630body: | 631 bb.0: 632 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 633 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr2, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 634... 635# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap 636# GCN: V_MFMA 637# GFX940-NEXT: S_NOP 7 638# GFX940-NEXT: S_NOP 2 639 640# GFX950-NEXT: S_NOP 7 641# GFX950-NEXT: S_NOP 7 642# GFX950-NEXT: S_NOP 2 643# GCN-NEXT: V_MFMA 644name: dgemm16x16_mfma_write_vgpr_mfma_srcb_read_overlap 645body: | 646 bb.0: 647 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 648 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr0_vgpr1, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 649... 650# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap 651# GCN: V_MFMA 652# GFX940-NEXT: S_NOP 7 653# GFX940-NEXT: S_NOP 2 654 655# GFX950-NEXT: S_NOP 7 656# GFX950-NEXT: S_NOP 7 657# GFX950-NEXT: S_NOP 2 658# GCN-NEXT: V_SMFMAC 659name: dgemm16x16_mfma_write_vgpr_smfmac_srcb_read_overlap 660body: | 661 bb.0: 662 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 663 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 664... 665# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap 666# GCN: V_MFMA 667# GFX940-NEXT: S_NOP 7 668# GFX940-NEXT: S_NOP 2 669 670# GFX950-NEXT: S_NOP 7 671# GFX950-NEXT: S_NOP 7 672# GFX950-NEXT: S_NOP 2 673 674# GCN-NEXT: V_SMFMAC 675name: dgemm16x16_mfma_write_vgpr_smfmac_srcc_read_overlap 676body: | 677 bb.0: 678 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 679 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr2, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 680... 681# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap 682# GCN: V_MFMA 683# GCN-NEXT: S_NOP 4 684# GCN-NEXT: V_SMFMAC 685name: xdl_sgemm4x4_mfma_write_vgpr_smfmac_srcc_read_overlap 686body: | 687 bb.0: 688 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 689 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr1, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 690... 691# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_vm_read 692# GCN: V_MFMA 693# GCN-NEXT: S_NOP 4 694# GCN-NEXT: BUFFER_STORE_DWORD 695name: xdl_smfma4x4_write_vgpr_vm_read 696body: | 697 bb.0: 698 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 699 BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec 700... 701# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_flat_read 702# GCN: V_MFMA 703# GCN-NEXT: S_NOP 4 704# GCN-NEXT: FLAT_STORE_DWORD 705name: xdl_smfma4x4_write_vgpr_flat_read 706body: | 707 bb.0: 708 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 709 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 710... 711# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_lds_read 712# GCN: V_MFMA 713# GCN-NEXT: S_NOP 4 714# GCN-NEXT: DS_WRITE_B32 715name: xdl_smfma4x4_write_vgpr_lds_read 716body: | 717 bb.0: 718 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 719 DS_WRITE_B32 $vgpr0, $vgpr4, 0, 0, implicit $m0, implicit $mode, implicit $exec 720... 721# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_exp_read 722# GCN: V_MFMA 723# GCN-NEXT: S_NOP 4 724# GCN-NEXT: EXP_DONE 725name: xdl_smfma4x4_write_vgpr_exp_read 726body: | 727 bb.0: 728 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 729 EXP_DONE 12, $vgpr4, $vgpr0, $vgpr0, $vgpr0, 0, 0, 15, implicit $exec 730... 731# GCN-LABEL: name: smfmac16x16_write_vgpr_flat_read 732# GCN: V_SMFMAC 733# GCN-NEXT: S_NOP 6 734# GCN-NEXT: FLAT_STORE_DWORD 735name: smfmac16x16_write_vgpr_flat_read 736body: | 737 bb.0: 738 $vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec 739 FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 740... 741# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_flat_read 742# GCN: V_MFMA 743# GCN-NEXT: S_NOP 7 744# GCN-NEXT: S_NOP 2 745# GCN-NEXT: FLAT_STORE_DWORD 746name: xdl_smfma16x16_write_vgpr_flat_read 747body: | 748 bb.0: 749 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 750 FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 751... 752# GCN-LABEL: name: smfmac32x32_write_vgpr_flat_read 753# GCN: V_SMFMAC 754# GCN-NEXT: S_NOP 7 755# GCN-NEXT: S_NOP 2 756# GCN-NEXT: FLAT_STORE_DWORD 757name: smfmac32x32_write_vgpr_flat_read 758body: | 759 bb.0: 760 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec 761 FLAT_STORE_DWORD $vgpr16_vgpr17, $vgpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 762... 763# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_flat_read 764# GCN: V_MFMA 765# GCN-NEXT: S_NOP 7 766# GCN-NEXT: S_NOP 7 767# GCN-NEXT: S_NOP 2 768# GCN-NEXT: FLAT_STORE_DWORD 769name: xdl_smfma32x32_write_vgpr_flat_read 770body: | 771 bb.0: 772 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec 773 FLAT_STORE_DWORD $vgpr16_vgpr17, $agpr1, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 774... 775# GCN-LABEL: name: dmfma4x4_write_vgpr_flat_read_overlap 776# GCN: V_MFMA 777# GCN-NEXT: S_NOP 7 778# GCN-NEXT: S_NOP 0 779# GCN-NEXT: FLAT_STORE_DWORD 780name: dmfma4x4_write_vgpr_flat_read_overlap 781body: | 782 bb.0: 783 $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 784 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 785... 786# GCN-LABEL: name: dmfma4x4_write_vgpr_flat_read_full 787# GCN: V_MFMA 788# GCN-NEXT: S_NOP 7 789# GCN-NEXT: S_NOP 0 790# GCN-NEXT: FLAT_STORE_DWORD 791name: dmfma4x4_write_vgpr_flat_read_full 792body: | 793 bb.0: 794 $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 795 FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 796... 797# GCN-LABEL: name: dmfma16x16_write_vgpr_flat_read 798# GCN: V_MFMA 799# GCN-NEXT: S_NOP 7 800# GCN-NEXT: S_NOP 7 801# GCN-NEXT: S_NOP 1 802# GCN-NEXT: FLAT_STORE_DWORD 803name: dmfma16x16_write_vgpr_flat_read 804body: | 805 bb.0: 806 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 807 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 808... 809# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_read 810# GCN: V_MFMA 811# GCN-NEXT: S_NOP 4 812# GCN-NEXT: V_MOV_B32 813name: xdl_smfma4x4_write_vgpr_valu_read 814body: | 815 bb.0: 816 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 817 $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec 818... 819# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_read 820# GCN: V_MFMA 821# GCN-NEXT: S_NOP 7 822# GCN-NEXT: S_NOP 2 823# GCN-NEXT: V_MOV_B32 824name: xdl_smfma16x16_write_vgpr_valu_read 825body: | 826 bb.0: 827 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 828 $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec 829... 830# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_read 831# GCN: V_MFMA 832# GCN-NEXT: S_NOP 7 833# GCN-NEXT: S_NOP 7 834# GCN-NEXT: S_NOP 2 835# GCN-NEXT: V_MOV_B32 836name: xdl_smfma32x32_write_vgpr_valu_read 837body: | 838 bb.0: 839 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 840 $vgpr16 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec 841... 842# GCN-LABEL: name: dmfma4x4_write_vgpr_valu_read 843# GCN: V_MFMA 844# GCN-NEXT: S_NOP 5 845# GCN-NEXT: V_MOV_B32 846name: dmfma4x4_write_vgpr_valu_read 847body: | 848 bb.0: 849 $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 850 $vgpr6 = V_MOV_B32_e32 $vgpr5, implicit $exec 851... 852# GCN-LABEL: name: dmfma16x16_write_vgpr_valu_read 853# GCN: V_MFMA 854# GFX940-NEXT: S_NOP 7 855# GFX940-NEXT: S_NOP 2 856 857# GFX950-NEXT: S_NOP 7 858# GFX950-NEXT: S_NOP 7 859# GFX950-NEXT: S_NOP 2 860# GCN-NEXT: V_MOV_B32 861name: dmfma16x16_write_vgpr_valu_read 862body: | 863 bb.0: 864 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 865 $vgpr12 = V_MOV_B32_e32 $vgpr4, implicit $exec 866... 867# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_accv_read 868# GCN: V_MFMA 869# GCN-NEXT: S_NOP 4 870# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 871name: xdl_smfma4x4_write_vgpr_accv_read 872body: | 873 bb.0: 874 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 875 $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $mode, implicit $exec 876... 877# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_accv_read 878# GCN: V_MFMA 879# GCN-NEXT: S_NOP 7 880# GCN-NEXT: S_NOP 2 881# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 882name: xdl_smfma16x16_write_vgpr_accv_read 883body: | 884 bb.0: 885 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 886 $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $mode, implicit $exec 887... 888# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_accv_read 889# GCN: V_MFMA 890# GCN-NEXT: S_NOP 7 891# GCN-NEXT: S_NOP 7 892# GCN-NEXT: S_NOP 2 893# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 894name: xdl_smfma32x32_write_vgpr_accv_read 895body: | 896 bb.0: 897 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 898 $agpr0 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $mode, implicit $exec 899... 900# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_dot_read 901# GCN: V_MFMA 902# GCN-NEXT: S_NOP 4 903# GCN-NEXT: V_DOT 904name: xdl_smfma4x4_write_vgpr_dot_read 905body: | 906 bb.0: 907 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 908 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 909... 910# GCN-LABEL: name: dmfma4x4_write_vgpr_dot_read 911# GCN: V_MFMA 912# GCN-NEXT: S_NOP 5 913# GCN-NEXT: V_DOT 914name: dmfma4x4_write_vgpr_dot_read 915body: | 916 bb.0: 917 $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 918 $vgpr1 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr5, $vgpr1, implicit $exec 919... 920# GCN-LABEL: name: dmfma16x16_write_vgpr_dot_read 921# GCN: V_MFMA 922# GFX940-NEXT: S_NOP 7 923# GFX940-NEXT: S_NOP 2 924 925# GFX950-NEXT: S_NOP 7 926# GFX950-NEXT: S_NOP 7 927# GFX950-NEXT: S_NOP 2 928 929# GCN-NEXT: V_DOT 930name: dmfma16x16_write_vgpr_dot_read 931body: | 932 bb.0: 933 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 934 $vgpr1 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr4, $vgpr1, implicit $exec 935... 936# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_write 937# GCN: V_MFMA 938# GCN-NEXT: S_NOP 4 939# GCN-NEXT: V_MOV_B32 940name: xdl_smfma4x4_write_vgpr_valu_write 941body: | 942 bb.0: 943 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr0, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec 944 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 945... 946# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_write 947# GCN: V_MFMA 948# GCN-NEXT: S_NOP 7 949# GCN-NEXT: S_NOP 2 950# GCN-NEXT: V_MOV_B32 951name: xdl_smfma16x16_write_vgpr_valu_write 952body: | 953 bb.0: 954 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 955 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 956... 957# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_write 958# GCN: V_MFMA 959# GCN-NEXT: S_NOP 7 960# GCN-NEXT: S_NOP 7 961# GCN-NEXT: S_NOP 2 962# GCN-NEXT: V_MOV_B32 963name: xdl_smfma32x32_write_vgpr_valu_write 964body: | 965 bb.0: 966 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 967 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 968... 969# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_f16_write 970# GCN: V_MFMA 971# GCN-NEXT: S_NOP 4 972# GCN-NEXT: V_FMA_F16_e64 973name: xdl_smfma4x4_write_vgpr_valu_f16_write 974body: | 975 bb.0: 976 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 977 $vgpr1 = V_FMA_F16_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec 978... 979# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_f16_write 980# GCN: V_MFMA 981# GCN-NEXT: S_NOP 7 982# GCN-NEXT: S_NOP 2 983# GCN-NEXT: V_FMA_F16_e64 984name: xdl_smfma16x16_write_vgpr_valu_f16_write 985body: | 986 bb.0: 987 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 988 $vgpr1 = V_FMA_F16_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec 989... 990# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_f16_write 991# GCN: V_MFMA 992# GCN-NEXT: S_NOP 7 993# GCN-NEXT: S_NOP 7 994# GCN-NEXT: S_NOP 2 995# GCN-NEXT: V_FMA_F16_e64 996name: xdl_smfma32x32_write_vgpr_valu_f16_write 997body: | 998 bb.0: 999 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 1000 $vgpr1 = V_FMA_F16_e64 0, 0, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec 1001... 1002# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_valu_sdwa_write 1003# GCN: V_MFMA 1004# GCN-NEXT: S_NOP 4 1005# GCN-NEXT: V_MOV_B32_sdwa 1006name: xdl_smfma4x4_write_vgpr_valu_sdwa_write 1007body: | 1008 bb.0: 1009 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1010 $vgpr1 = V_MOV_B32_sdwa 0, $vgpr16, 0, 5, 2, 4, implicit $exec, implicit $vgpr1(tied-def 0) 1011... 1012# GCN-LABEL: name: xdl_smfma16x16_write_vgpr_valu_sdwa_write 1013# GCN: V_MFMA 1014# GCN-NEXT: S_NOP 7 1015# GCN-NEXT: S_NOP 2 1016# GCN-NEXT: V_MOV_B32_sdwa 1017name: xdl_smfma16x16_write_vgpr_valu_sdwa_write 1018body: | 1019 bb.0: 1020 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $vgpr16, $vgpr17, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1021 $vgpr1 = V_MOV_B32_sdwa 0, $vgpr16, 0, 5, 2, 4, implicit $exec, implicit $vgpr1(tied-def 0) 1022... 1023# GCN-LABEL: name: xdl_smfma32x32_write_vgpr_valu_sdwa_write 1024# GCN: V_MFMA 1025# GCN-NEXT: S_NOP 7 1026# GCN-NEXT: S_NOP 7 1027# GCN-NEXT: S_NOP 2 1028# GCN-NEXT: V_MOV_B32_sdwa 1029name: xdl_smfma32x32_write_vgpr_valu_sdwa_write 1030body: | 1031 bb.0: 1032 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 1033 $vgpr1 = V_MOV_B32_sdwa 0, $vgpr16, 0, 5, 2, 4, implicit $exec, implicit $vgpr1(tied-def 0) 1034... 1035# GCN-LABEL: name: dmfma4x4_write_vgpr_valu_write 1036# GCN: V_MFMA 1037# GCN-NEXT: S_NOP 5 1038# GCN-NEXT: V_MOV_B32 1039name: dmfma4x4_write_vgpr_valu_write 1040body: | 1041 bb.0: 1042 $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 1043 $vgpr4 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1044... 1045# GCN-LABEL: name: dmfma16x16_write_vgpr_valu_write 1046# GCN: V_MFMA 1047# GCN-NEXT: S_NOP 7 1048# GCN-NEXT: S_NOP 2 1049# GCN-NEXT: V_MOV_B32 1050name: dmfma16x16_write_vgpr_valu_write 1051body: | 1052 bb.0: 1053 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 1054 $vgpr3 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1055... 1056# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_accv_write 1057# GCN: V_MFMA 1058# GCN-NEXT: S_NOP 4 1059# GCN-NEXT: V_ACCVGPR_READ_B32_e64 1060name: xdl_smfma4x4_write_vgpr_accv_write 1061body: | 1062 bb.0: 1063 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1064 $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $mode, implicit $exec 1065... 1066# GCN-LABEL: name: xdl_smfma4x4_write_vgpr_dot_write 1067# GCN: V_MFMA 1068# GCN-NEXT: S_NOP 4 1069# GCN-NEXT: V_DOT 1070name: xdl_smfma4x4_write_vgpr_dot_write 1071body: | 1072 bb.0: 1073 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1074 $vgpr1 = V_DOT8_I32_I4 0, $vgpr4, 0, $vgpr4, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec 1075... 1076# GCN-LABEL: name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write 1077# GCN: V_MFMA 1078# GCN-NEXT: V_MOV_B32 1079name: nonxdl_smfma4x4_read_srcc_vgpr_valu_write 1080body: | 1081 bb.0: 1082 $vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1083 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1084... 1085# GCN-LABEL: name: smfma16x16_read_srcc_vgpr_valu_write 1086# GCN: V_MFMA 1087# GCN-NEXT: V_MOV_B32 1088name: smfma16x16_read_srcc_vgpr_valu_write 1089body: | 1090 bb.0: 1091 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1092 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1093... 1094# GCN-LABEL: name: smfma32x32_read_srcc_vgpr_valu_write 1095# GCN: V_MFMA 1096# GCN-NEXT: V_MOV_B32 1097name: smfma32x32_read_srcc_vgpr_valu_write 1098body: | 1099 bb.0: 1100 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr0, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1101 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1102... 1103# GCN-LABEL: name: smfma4x4_read_srca_vgpr_valu_write 1104# GCN: V_MFMA 1105# GCN-NEXT: V_MOV_B32 1106name: smfma4x4_read_srca_vgpr_valu_write 1107body: | 1108 bb.0: 1109 $vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1110 $vgpr8 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1111... 1112# GCN-LABEL: name: smfma16x16_read_srca_vgpr_valu_write 1113# GCN: V_MFMA 1114# GCN-NEXT: V_MOV_B32 1115name: smfma16x16_read_srca_vgpr_valu_write 1116body: | 1117 bb.0: 1118 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1119 $vgpr18 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1120... 1121# GCN-LABEL: name: smfma32x32_read_srca_vgpr_valu_write 1122# GCN: V_MFMA 1123# GCN-NEXT: V_MOV_B32 1124name: smfma32x32_read_srca_vgpr_valu_write 1125body: | 1126 bb.0: 1127 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1128 $vgpr18 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1129... 1130# GCN-LABEL: name: smfma4x4_read_srcb_vgpr_valu_write 1131# GCN: V_MFMA 1132# GCN-NEXT: V_MOV_B32 1133name: smfma4x4_read_srcb_vgpr_valu_write 1134body: | 1135 bb.0: 1136 $vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1137 $vgpr9 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1138... 1139# GCN-LABEL: name: smfma16x16_read_srcb_vgpr_valu_write 1140# GCN: V_MFMA 1141# GCN-NEXT: V_MOV_B32 1142name: smfma16x16_read_srcb_vgpr_valu_write 1143body: | 1144 bb.0: 1145 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1146 $vgpr19 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1147... 1148# GCN-LABEL: name: smfma32x32_read_srcb_vgpr_valu_write 1149# GCN: V_MFMA 1150# GCN-NEXT: V_MOV_B32 1151name: smfma32x32_read_srcb_vgpr_valu_write 1152body: | 1153 bb.0: 1154 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_32X32X2F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1155 $vgpr19 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1156... 1157# GCN-LABEL: name: dmfma4x4_read_srcc_vgpr_valu_write 1158# GCN: V_MFMA 1159# GCN-NEXT: V_MOV_B32 1160name: dmfma4x4_read_srcc_vgpr_valu_write 1161body: | 1162 bb.0: 1163 $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 1164 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1165... 1166# GCN-LABEL: name: dmfma16x16_read_srcc_vgpr_valu_write 1167# GCN: V_MFMA 1168# GCN-NEXT: V_MOV_B32 1169name: dmfma16x16_read_srcc_vgpr_valu_write 1170body: | 1171 bb.0: 1172 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 1173 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1174... 1175# GCN-LABEL: name: smfma16x16_read_srcc_vgpr_accv_write 1176# GCN: V_MFMA 1177# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 1178name: smfma16x16_read_srcc_vgpr_accv_write 1179body: | 1180 bb.0: 1181 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_MFMA_F32_16X16X1F32_e64 $agpr18, $agpr19, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 1182 $agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec 1183... 1184# GCN-LABEL: name: sgemm_to_fma64 1185# GCN: V_MFMA 1186# GCN-NEXT: V_FMA_F64_e64 1187name: sgemm_to_fma64 1188body: | 1189 bb.0: 1190 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1191 $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec 1192... 1193# GCN-LABEL: name: dgemm_to_fma64 1194# GCN: V_MFMA 1195# GCN-NEXT: S_NOP 1 1196# GCN-NEXT: V_FMA_F64_e64 1197name: dgemm_to_fma64 1198body: | 1199 bb.0: 1200 $vgpr0_vgpr1 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 1201 $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec 1202... 1203# GCN-LABEL: name: dgemm_to_fmac64 1204# GCN: V_MFMA 1205# GCN-NEXT: S_NOP 1 1206# GCN-NEXT: V_FMAC_F64 1207name: dgemm_to_fmac64 1208body: | 1209 bb.0: 1210 $vgpr0_vgpr1 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 1211 $vgpr4_vgpr5 = V_FMAC_F64_e32 $vgpr4_vgpr5, $vgpr4_vgpr5, $vgpr4_vgpr5, implicit $mode, implicit $exec 1212... 1213# GCN-LABEL: name: flat_store_data_agpr_overwritten 1214# GCN: FLAT_STORE_DWORDX4 1215# GCN-NEXT: S_NOP 1 1216# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 1217name: flat_store_data_agpr_overwritten 1218body: | 1219 bb.0: 1220 FLAT_STORE_DWORDX4 $vgpr4_vgpr5, $agpr0_agpr1_agpr2_agpr3, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 1221 $agpr0 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec 1222... 1223# GCN-LABEL: name: dot_write_vgpr_accv_read 1224# GCN: V_DOT 1225# GCN-NEXT: S_NOP 2 1226# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 1227name: dot_write_vgpr_accv_read 1228body: | 1229 bb.0: 1230 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1231 $agpr5 = V_ACCVGPR_WRITE_B32_e64 $vgpr4, implicit $exec 1232... 1233# GCN-LABEL: name: valu_write_vgpr_dot_read 1234# GCN: V_MOV_B32 1235# GCN-NEXT: V_DOT 1236name: valu_write_vgpr_dot_read 1237body: | 1238 bb.0: 1239 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 1240 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1241... 1242# GCN-LABEL: name: accv_write_vgpr_dot_read 1243# GCN: V_ACCVGPR_READ 1244# GCN-NEXT: V_DOT 1245name: accv_write_vgpr_dot_read 1246body: | 1247 bb.0: 1248 $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec 1249 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1250... 1251# GCN-LABEL: name: dot_write_vgpr_same_dot_read_srcc 1252# GCN: V_DOT 1253# GCN-NEXT: V_DOT 1254name: dot_write_vgpr_same_dot_read_srcc 1255body: | 1256 bb.0: 1257 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1258 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1259... 1260# GCN-LABEL: name: dot_write_vgpr_different_dot_read_srcc 1261# GCN: V_DOT 1262# GCN-NEXT: S_NOP 2 1263# GCN-NEXT: V_DOT 1264name: dot_write_vgpr_different_dot_read_srcc 1265body: | 1266 bb.0: 1267 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1268 $vgpr1 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $mode, implicit $exec 1269... 1270# GCN-LABEL: name: dot_write_vgpr_different_dot_write 1271# GCN: V_DOT 1272# GCN-NEXT: S_NOP 2 1273# GCN-NEXT: V_DOT 1274name: dot_write_vgpr_different_dot_write 1275body: | 1276 bb.0: 1277 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1278 $vgpr4 = V_DOT8_I32_I4 0, $vgpr0, 0, $vgpr0, 0, $vgpr0, 0, 0, 0, implicit $mode, implicit $exec 1279... 1280# GCN-LABEL: name: dot_write_vgpr_different_valu_read 1281# GCN: V_DOT 1282# GCN-NEXT: S_NOP 2 1283# GCN-NEXT: V_MOV_B32_e32 1284name: dot_write_vgpr_different_valu_read 1285body: | 1286 bb.0: 1287 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1288 $vgpr0 = V_MOV_B32_e32 $vgpr4, implicit $exec 1289... 1290# GCN-LABEL: name: dot_write_vgpr_different_valu_write 1291# GCN: V_DOT 1292# GCN-NEXT: S_NOP 2 1293# GCN-NEXT: V_MOV_B32_e32 1294name: dot_write_vgpr_different_valu_write 1295body: | 1296 bb.0: 1297 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1298 $vgpr4 = V_MOV_B32_e32 1, implicit $exec 1299... 1300# GCN-LABEL: name: dot_write_vgpr_same_dot_read_srca 1301# GCN: V_DOT 1302# GCN-NEXT: S_NOP 2 1303# GCN-NEXT: V_DOT 1304name: dot_write_vgpr_same_dot_read_srca 1305body: | 1306 bb.0: 1307 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1308 $vgpr0 = V_DOT4C_I32_I8_e32 $vgpr4, $vgpr1, $vgpr0, implicit $mode, implicit $exec 1309... 1310# GCN-LABEL: name: dot_write_vgpr_same_dot_read_srcb 1311# GCN: V_DOT 1312# GCN-NEXT: S_NOP 2 1313# GCN-NEXT: V_DOT 1314name: dot_write_vgpr_same_dot_read_srcb 1315body: | 1316 bb.0: 1317 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1318 $vgpr0 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr4, $vgpr0, implicit $mode, implicit $exec 1319... 1320# GCN-LABEL: name: vcmpx_write_exec_mfma 1321# GCN: V_CMPX_EQ_I32_e32 1322# GCN-NEXT: S_NOP 3 1323# GCN-NEXT: V_MFMA 1324name: vcmpx_write_exec_mfma 1325body: | 1326 bb.0: 1327 implicit $exec, implicit $vcc = V_CMPX_EQ_I32_e32 $vgpr0, $vgpr1, implicit $exec 1328 $agpr4_agpr5_agpr6_agpr7 = V_MFMA_F32_4X4X1F32_e64 killed $agpr8, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1329... 1330# GCN-LABEL: name: valu_write_agpr_dgemm_mfma_read 1331# GCN: V_ACCVGPR_WRITE_B32_e64 1332# GCN: V_ACCVGPR_WRITE_B32_e64 1333# GCN-NEXT: S_NOP 1 1334# GCN-NEXT: V_MFMA 1335name: valu_write_agpr_dgemm_mfma_read 1336body: | 1337 bb.0: 1338 $agpr0 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec 1339 $agpr1 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec 1340 $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1341... 1342# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_same_agpr_as_srcc 1343# GCN: V_MFMA 1344# GCN-NEXT: V_MFMA 1345name: dgemm16x16_mfma_write_agpr_mfma_read_same_agpr_as_srcc 1346body: | 1347 bb.0: 1348 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $agpr10_agpr11, $agpr10_agpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1349 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $agpr10_agpr11, $agpr10_agpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1350... 1351# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc 1352# GCN: V_MFMA 1353# GCN-NEXT: S_NOP 3 1354# GCN-NEXT: V_MFMA 1355name: dgemm4x4_mfma_write_agpr_mfma_read_same_agpr_as_srcc 1356body: | 1357 bb.0: 1358 $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $vgpr0_vgpr1, $agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1359 $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $vgpr0_vgpr1, $agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1360... 1361# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap 1362# GCN: V_MFMA 1363# GFX940-NEXT: S_NOP 7 1364# GFX940-NEXT: S_NOP 0 1365 1366# GFX950-NEXT: S_NOP 7 1367# GFX950-NEXT: S_NOP 7 1368# GFX950-NEXT: S_NOP 0 1369# GCN-NEXT: V_MFMA 1370name: dgemm16x16_mfma_write_agpr_mfma_read_overlap 1371body: | 1372 bb.0: 1373 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1374 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1375... 1376# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_read_overlap 1377# GCN: V_MFMA 1378# GCN-NEXT: S_NOP 3 1379# GCN-NEXT: V_MFMA 1380name: dgemm4x4_mfma_write_agpr_mfma_read_overlap 1381body: | 1382 bb.0: 1383 $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1384 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1385... 1386# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap 1387# GCN: V_MFMA 1388# GFX940-NEXT: S_NOP 7 1389# GFX940-NEXT: S_NOP 0 1390 1391# GFX950-NEXT: S_NOP 7 1392# GFX950-NEXT: S_NOP 7 1393# GFX950-NEXT: S_NOP 0 1394 1395# GCN-NEXT: V_MFMA 1396name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap 1397body: | 1398 bb.0: 1399 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1400 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr10, $vgpr11, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1401... 1402# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap 1403# GCN: V_MFMA 1404# GCN-NEXT: S_NOP 2 1405# GCN-NEXT: V_MFMA 1406name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap 1407body: | 1408 bb.0: 1409 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr10, $vgpr11, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1410 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1411... 1412# GCN-LABEL: name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap 1413# GCN: V_MFMA 1414# GCN-NEXT: S_NOP 7 1415# GFX940-NEXT: S_NOP 0 1416# GFX950-NEXT: S_NOP 1 1417# GCN-NEXT: V_MFMA 1418name: xdl_sgemm16x16_mfma_write_sgpr_dgemm_mfma_read_overlap 1419body: | 1420 bb.0: 1421 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_I32_16X16X4I8_e64 $vgpr26, $vgpr27, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, 0, 0, 0, implicit $mode, implicit $exec 1422 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr20_vgpr21, $vgpr20_vgpr21, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1423... 1424# GCN-LABEL: name: xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap 1425# GCN: V_MFMA 1426# GCN-NEXT: S_NOP 7 1427# GCN-NEXT: S_NOP 7 1428# GFX940-NEXT: S_NOP 0 1429# GFX950-NEXT: S_NOP 1 1430# GCN-NEXT: V_MFMA 1431name: xdl_sgemm32x32_mfma_write_agpr_dgemm_mfma_read_overlap 1432body: | 1433 bb.0: 1434 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr26_vgpr27, $vgpr28_vgpr29, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec 1435 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr120_vgpr121, $vgpr120_vgpr121, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1436... 1437# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_dmfma4x4_srca_read_overlap 1438# GCN: V_MFMA 1439# GCN-NEXT: S_NOP 4 1440# GCN-NEXT: V_MFMA 1441name: xdl_sgemm4x4_mfma_write_agpr_dmfma4x4_srca_read_overlap 1442body: | 1443 bb.0: 1444 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1445 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 1446... 1447# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_dmfma16x16_srca_read_overlap 1448# GCN: V_MFMA 1449# GCN-NEXT: S_NOP 4 1450# GCN-NEXT: V_MFMA 1451name: xdl_sgemm4x4_mfma_write_agpr_dmfma16x16_srca_read_overlap 1452body: | 1453 bb.0: 1454 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1455 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 1456... 1457# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_srca_read_overlap 1458# GCN: V_MFMA 1459# GCN-NEXT: S_NOP 5 1460# GCN-NEXT: V_MFMA 1461name: dgemm4x4_mfma_write_agpr_mfma_srca_read_overlap 1462body: | 1463 bb.0: 1464 $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1465 $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $agpr2_agpr3, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 1466... 1467# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap 1468# GCN: V_MFMA 1469# GFX940-NEXT: S_NOP 7 1470# GFX940-NEXT: S_NOP 2 1471 1472# GFX950-NEXT: S_NOP 7 1473# GFX950-NEXT: S_NOP 7 1474# GFX950-NEXT: S_NOP 2 1475# GCN-NEXT: V_MFMA 1476name: dgemm16x16_mfma_write_agpr_mfma_srca_read_overlap 1477body: | 1478 bb.0: 1479 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, 0, 0, 0, implicit $mode, implicit $exec 1480 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr10_vgpr11, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 1481... 1482# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_sgemm_mfma_srca_read_overlap 1483# GCN: V_MFMA 1484# GCN-NEXT: S_NOP 5 1485# GCN-NEXT: V_MFMA 1486name: dgemm4x4_mfma_write_agpr_sgemm_mfma_srca_read_overlap 1487body: | 1488 bb.0: 1489 $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1490 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $agpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1491... 1492# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap 1493# GCN: V_MFMA 1494# GFX940-NEXT: S_NOP 7 1495# GFX940-NEXT: S_NOP 2 1496 1497# GFX950-NEXT: S_NOP 7 1498# GFX950-NEXT: S_NOP 7 1499# GFX950-NEXT: S_NOP 2 1500 1501# GCN-NEXT: V_MFMA 1502name: dgemm16x16_mfma_write_agpr_sgemm_mfma_srca_read_overlap 1503body: | 1504 bb.0: 1505 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, 0, 0, 0, implicit $mode, implicit $exec 1506 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $agpr4, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1507... 1508# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_dgemm_mfma_srca_read_overlap 1509# GCN: V_MFMA 1510# GCN-NEXT: S_NOP 4 1511# GCN-NEXT: V_MFMA 1512name: xdl_sgemm4x4_mfma_write_agpr_dgemm_mfma_srca_read_overlap 1513body: | 1514 bb.0: 1515 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr4, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1516 $vgpr4_vgpr5 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $agpr0_agpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 1517... 1518# GCN-LABEL: name: dgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap 1519# GCN: V_MFMA 1520# GCN-NEXT: S_NOP 5 1521# GCN-NEXT: V_MFMA 1522name: dgemm4x4_mfma_write_agpr_mfma_srcb_read_overlap 1523body: | 1524 bb.0: 1525 $agpr2_agpr3 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1526 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $agpr2_agpr3, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 1527... 1528# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap 1529# GCN: V_MFMA 1530# GFX940-NEXT: S_NOP 7 1531# GFX940-NEXT: S_NOP 2 1532 1533# GFX950-NEXT: S_NOP 7 1534# GFX950-NEXT: S_NOP 7 1535# GFX950-NEXT: S_NOP 2 1536# GCN-NEXT: V_MFMA 1537name: dgemm16x16_mfma_write_agpr_mfma_srcb_read_overlap 1538body: | 1539 bb.0: 1540 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, 0, 0, 0, implicit $mode, implicit $exec 1541 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $agpr0_agpr1, $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 1542... 1543# GCN-LABEL: name: dmfma4x4_write_agpr_flat_read_overlap 1544# GCN: V_MFMA 1545# GCN-NEXT: S_NOP 7 1546# GCN-NEXT: S_NOP 0 1547# GCN-NEXT: FLAT_STORE_DWORD 1548name: dmfma4x4_write_agpr_flat_read_overlap 1549body: | 1550 bb.0: 1551 $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1552 FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 1553... 1554# GCN-LABEL: name: dmfma4x4_write_agpr_flat_read_full 1555# GCN: V_MFMA 1556# GCN-NEXT: S_NOP 7 1557# GCN-NEXT: S_NOP 0 1558# GCN-NEXT: FLAT_STORE_DWORD 1559name: dmfma4x4_write_agpr_flat_read_full 1560body: | 1561 bb.0: 1562 $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1563 FLAT_STORE_DWORDX2 $vgpr0_vgpr1, $agpr4_agpr5, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 1564... 1565# GCN-LABEL: name: dmfma16x16_write_agpr_flat_read 1566# GCN: V_MFMA 1567# GCN-NEXT: S_NOP 7 1568# GCN-NEXT: S_NOP 7 1569# GCN-NEXT: S_NOP 1 1570# GCN-NEXT: FLAT_STORE_DWORD 1571name: dmfma16x16_write_agpr_flat_read 1572body: | 1573 bb.0: 1574 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1575 FLAT_STORE_DWORD $vgpr0_vgpr1, $agpr4, 0, 0, implicit $mode, implicit $exec, implicit $flat_scr 1576... 1577# GCN-LABEL: name: dmfma4x4_write_agpr_valu_read 1578# GCN: V_MFMA 1579# GCN-NEXT: S_NOP 5 1580# GCN-NEXT: V_ACCVGPR_READ_B32_e64 1581name: dmfma4x4_write_agpr_valu_read 1582body: | 1583 bb.0: 1584 $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1585 $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr5, implicit $exec 1586... 1587# GCN-LABEL: name: dmfma16x16_write_agpr_valu_read 1588# GCN: V_MFMA 1589# GFX940-NEXT: S_NOP 7 1590# GFX940-NEXT: S_NOP 2 1591 1592# GFX950-NEXT: S_NOP 7 1593# GFX950-NEXT: S_NOP 7 1594# GFX950-NEXT: S_NOP 2 1595# GCN-NEXT: V_ACCVGPR_READ_B32_e64 1596name: dmfma16x16_write_agpr_valu_read 1597body: | 1598 bb.0: 1599 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1600 $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec 1601... 1602# GCN-LABEL: name: dmfma4x4_write_agpr_valu_write 1603# GCN: V_MFMA 1604# GCN-NEXT: S_NOP 5 1605# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 1606name: dmfma4x4_write_agpr_valu_write 1607body: | 1608 bb.0: 1609 $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1610 $agpr4 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec 1611... 1612# GCN-LABEL: name: dmfma16x16_write_agpr_valu_write 1613# GCN: V_MFMA 1614# GCN-NEXT: S_NOP 7 1615# GCN-NEXT: S_NOP 2 1616# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 1617name: dmfma16x16_write_agpr_valu_write 1618body: | 1619 bb.0: 1620 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1621 $agpr3 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec 1622... 1623# GCN-LABEL: name: dmfma4x4_read_srcc_agpr_valu_write 1624# GCN: V_MFMA 1625# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 1626name: dmfma4x4_read_srcc_agpr_valu_write 1627body: | 1628 bb.0: 1629 $agpr4_agpr5 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1630 $agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec 1631... 1632# GCN-LABEL: name: dmfma16x16_read_srcc_agpr_valu_write 1633# GCN: V_MFMA 1634# GCN-NEXT: V_ACCVGPR_WRITE_B32_e64 1635name: dmfma16x16_read_srcc_agpr_valu_write 1636body: | 1637 bb.0: 1638 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = V_MFMA_F64_16X16X4F64_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, 0, 0, 0, implicit $mode, implicit $exec 1639 $agpr1 = V_ACCVGPR_WRITE_B32_e64 0, implicit $mode, implicit $exec 1640... 1641# GCN-LABEL: name: dgemm_accvgr_to_fma64 1642# GCN: V_MFMA 1643# GCN-NEXT: S_NOP 1 1644# GCN-NEXT: V_FMA_F64_e64 1645name: dgemm_accvgr_to_fma64 1646body: | 1647 bb.0: 1648 $agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1649 $vgpr4_vgpr5 = V_FMA_F64_e64 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, $vgpr4_vgpr5, 0, 0, implicit $mode, implicit $exec 1650... 1651# GCN-LABEL: name: dgemm_accvgr_to_fmac64 1652# GCN: V_MFMA 1653# GCN-NEXT: S_NOP 1 1654# GCN-NEXT: V_FMAC_F64 1655name: dgemm_accvgr_to_fmac64 1656body: | 1657 bb.0: 1658 $agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 1659 $vgpr4_vgpr5 = V_FMAC_F64_e32 $vgpr4_vgpr5, $vgpr4_vgpr5, $vgpr4_vgpr5, implicit $mode, implicit $exec 1660... 1661# GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap 1662# GCN: V_MFMA 1663# GFX940-NEXT: S_NOP 4 1664# GFX950-NEXT: S_NOP 5 1665# GCN-NEXT: V_MFMA 1666name: sgemm16X16X16_mfma_write_agpr_mfma_read_overlap 1667body: | 1668 bb.0: 1669 $agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_16X16X16F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1670 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_16X16X16F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1671... 1672# GCN-LABEL: name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap 1673# GCN: V_MFMA 1674# GFX940-NEXT: S_NOP 4 1675# GFX950-NEXT: S_NOP 5 1676# GCN-NEXT: V_MFMA 1677name: sgemm16X16X32_mfma_write_agpr_mfma_read_overlap 1678body: | 1679 bb.0: 1680 $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1681 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1682... 1683# GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap 1684# GCN: V_MFMA 1685# GFX940-NEXT: S_NOP 4 1686# GFX950-NEXT: S_NOP 5 1687# GCN-NEXT: V_MFMA 1688name: sgemm16X16X16_mfma_write_agpr_dgemm_read_overlap 1689body: | 1690 bb.0: 1691 $vgpr2_vgpr3_vgpr4_vgpr5 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr8_vgpr9, $vgpr8_vgpr9, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1692 $vgpr6_vgpr7 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1693... 1694# GCN-LABEL: name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap 1695# GCN: V_MFMA 1696# GFX940-NEXT: S_NOP 4 1697# GFX950-NEXT: S_NOP 5 1698# GCN-NEXT: V_SMFMAC 1699name: sgemm16X16X16_mfma_write_agpr_smfmac_read_overlap 1700body: | 1701 bb.0: 1702 $agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_16X16X16F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1703 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 1704... 1705# GCN-LABEL: name: smfmac16x16_write_agpr_smfmac_read_overlap 1706# GCN: V_SMFMAC 1707# GFX940-NEXT: S_NOP 4 1708# GFX950-NEXT: S_NOP 5 1709# GCN-NEXT: V_SMFMAC 1710name: smfmac16x16_write_agpr_smfmac_read_overlap 1711body: | 1712 bb.0: 1713 $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec 1714 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 1715... 1716# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap 1717# GCN: V_MFMA 1718# GCN-NEXT: S_NOP 6 1719# GCN-NEXT: V_MFMA 1720name: xdl_sgemm16X16X16_mfma_write_agpr_mfma_srca_read_overlap 1721body: | 1722 bb.0: 1723 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1724 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1725... 1726# GCN-LABEL: name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap 1727# GCN: V_MFMA 1728# GCN-NEXT: S_NOP 6 1729# GCN-NEXT: V_MFMA 1730name: xdl_sgemm16X16X32_mfma_write_agpr_mfma_srcb_read_overlap 1731body: | 1732 bb.0: 1733 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1734 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr0, $agpr1, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1735... 1736# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap 1737# GCN: V_MFMA 1738# GCN-NEXT: S_NOP 6 1739# GCN-NEXT: V_MFMA 1740name: xdl_sgemm16X16X16_mfma_write_vgpr_dmfma16x16_srca_read_overlap 1741body: | 1742 bb.0: 1743 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1744 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr4_vgpr5, $vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 1745... 1746# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write 1747# GCN: V_MFMA 1748# GCN-NEXT: S_NOP 6 1749# GCN-NEXT: V_MOV_B32 1750name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_write 1751body: | 1752 bb.0: 1753 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1754 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1755... 1756# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read 1757# GCN: V_MFMA 1758# GCN-NEXT: S_NOP 6 1759# GCN-NEXT: BUFFER_STORE_DWORD 1760name: xdl_sgemm16X16X16_mfma_write_vgpr_vm_read 1761body: | 1762 bb.0: 1763 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1764 BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec 1765... 1766# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read 1767# GCN: V_MFMA 1768# GCN-NEXT: S_NOP 6 1769# GCN-NEXT: V_MOV_B32 1770name: xdl_sgemm16X16X16_mfma_write_vgpr_valu_read 1771body: | 1772 bb.0: 1773 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1774 $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec 1775... 1776# GCN-LABEL: name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read 1777# GCN: V_MFMA 1778# GCN-NEXT: S_NOP 6 1779# GCN-NEXT: V_DOT 1780name: xdl_sgemm16X16X16_mfma_write_vgpr_dot_read 1781body: | 1782 bb.0: 1783 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1784 $vgpr4 = V_DOT4C_I32_I8_e32 $vgpr0, $vgpr1, $vgpr4, implicit $exec 1785... 1786# GCN-LABEL: name: smfmac16x16x32_write_agpr_mfma_read_same_agpr_as_srcc 1787# GCN: V_SMFMAC 1788# GCN-NEXT: V_SMFMAC 1789name: smfmac16x16x32_write_agpr_mfma_read_same_agpr_as_srcc 1790body: | 1791 bb.0: 1792 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 1793 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 1794... 1795# GCN-LABEL: name: smfmac32x32x32_write_agpr_mfma_read_same_agpr_as_srcc 1796# GCN: V_SMFMAC 1797# GCN-NEXT: V_SMFMAC 1798name: smfmac32x32x32_write_agpr_mfma_read_same_agpr_as_srcc 1799body: | 1800 bb.0: 1801 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec 1802 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec 1803... 1804# GCN-LABEL: name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap 1805# GCN: V_SMFMAC 1806# GFX940-NEXT: S_NOP 4 1807# GFX950-NEXT: S_NOP 5 1808# GCN-NEXT: V_SMFMAC 1809name: smfmac16x16x32_mfma_write_agpr_mfma_read_overlap 1810body: | 1811 bb.0: 1812 $agpr0_agpr1_agpr2_agpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3, implicit $mode, implicit $exec 1813 $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec 1814... 1815# GCN-LABEL: name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap 1816# GCN: V_SMFMAC 1817# GCN-NEXT: S_NOP 7 1818# GFX940-NEXT: S_NOP 0 1819# GFX950-NEXT: S_NOP 1 1820# GCN-NEXT: V_SMFMAC 1821name: smfmac32x32x32_mfma_write_agpr_mfma_read_overlap 1822body: | 1823 bb.0: 1824 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_SMFMAC_F32_32X32X16_BF16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $mode, implicit $exec 1825 $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17 = V_SMFMAC_F32_32X32X16_BF16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17, implicit $mode, implicit $exec 1826... 1827# GCN-LABEL: name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx 1828# GCN: V_SMFMAC 1829# GCN-NEXT: S_NOP 6 1830# GCN-NEXT: V_SMFMAC 1831name: smfmac16x16x32_mfma_write_vgpr_smfmac_read_idx 1832body: | 1833 bb.0: 1834 $vgpr6_vgpr7_vgpr8_vgpr9 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $vgpr6_vgpr7_vgpr8_vgpr9, implicit $mode, implicit $exec 1835 $vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr6, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec 1836... 1837# GCN-LABEL: name: dgemm4x4_mfma_write_vgpr_smfmac16x16x32_read_overlap 1838# GCN: V_MFMA 1839# GCN-NEXT: V_SMFMAC 1840name: dgemm4x4_mfma_write_vgpr_smfmac16x16x32_read_overlap 1841body: | 1842 bb.0: 1843 $vgpr2_vgpr3 = V_MFMA_F64_4X4X4F64_vgprcd_e64 $vgpr0_vgpr1, $vgpr0_vgpr1, $vgpr0_vgpr1, 0, 0, 0, implicit $mode, implicit $exec 1844 $vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_BF16_e64 $vgpr10_vgpr11, $vgpr12_vgpr13_vgpr14_vgpr15, $vgpr32, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec 1845... 1846# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfmai8_read_overlap 1847# GCN: V_MFMA 1848# GCN-NEXT: V_MFMA 1849name: dgemm16x16_mfma_write_vgpr_mfmai8_read_overlap 1850body: | 1851 bb.0: 1852 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 1853 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_I32_16X16X32I8_vgprcd_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1854... 1855# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfmaxf32_read_overlap 1856# GCN: V_MFMA 1857# GCN-NEXT: V_MFMA 1858name: dgemm16x16_mfma_write_vgpr_mfmaxf32_read_overlap 1859body: | 1860 bb.0: 1861 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = V_MFMA_F64_16X16X4F64_vgprcd_e64 $vgpr10_vgpr11, $vgpr10_vgpr11, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, 0, 0, 0, implicit $mode, implicit $exec 1862 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X8XF32_vgprcd_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1863... 1864# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_nonxdl_mfma_read_overlap 1865# GCN: V_MFMA 1866# GCN-NEXT: S_NOP 1 1867# GCN-NEXT: V_MFMA 1868name: nonxdl_sgemm4x4_mfma_write_agpr_nonxdl_mfma_read_overlap 1869body: | 1870 bb.0: 1871 $agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1872 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1873... 1874# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_xdl_mfma_read_overlap 1875# GCN: V_MFMA 1876# GCN-NEXT: V_MFMA 1877name: nonxdl_sgemm4x4_mfma_write_agpr_xdl_mfma_read_overlap 1878body: | 1879 bb.0: 1880 $agpr2_agpr3_agpr4_agpr5 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1881 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1882... 1883# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap 1884# GCN: V_MFMA 1885# GCN-NEXT: S_NOP 2 1886# GCN-NEXT: V_MFMA 1887name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap 1888body: | 1889 bb.0: 1890 $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_4X4X4I8_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1891 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1892... 1893# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap 1894# GCN: V_MFMA 1895# GCN-NEXT: S_NOP 3 1896# GCN-NEXT: V_MFMA 1897name: nonxdl_sgemm4x4_mfma_write_agpr_mfma_srca_read_overlap 1898body: | 1899 bb.0: 1900 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $vgpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1901 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X1F32_e64 $agpr1, $vgpr0, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 1902... 1903# GCN-LABEL: name: nonxdl_smfma4x4_write_vgpr_vm_read 1904# GCN: V_MFMA 1905# GCN-NEXT: S_NOP 3 1906# GCN-NEXT: BUFFER_STORE_DWORD 1907name: nonxdl_smfma4x4_write_vgpr_vm_read 1908body: | 1909 bb.0: 1910 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1911 BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec 1912... 1913# GCN-LABEL: name: nonxdl_smfma4x4_write_vgpr_valu_read 1914# GCN: V_MFMA 1915# GCN-NEXT: S_NOP 3 1916# GCN-NEXT: V_MOV_B32 1917name: nonxdl_smfma4x4_write_vgpr_valu_read 1918body: | 1919 bb.0: 1920 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr0, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 1921 $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec 1922... 1923# GCN-LABEL: name: nonxdl_smfma4x4_write_vgpr_valu_write 1924# GCN: V_MFMA 1925# GCN-NEXT: S_NOP 3 1926# GCN-NEXT: V_MOV_B32 1927name: nonxdl_smfma4x4_write_vgpr_valu_write 1928body: | 1929 bb.0: 1930 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr0, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec 1931 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1932... 1933# GCN-LABEL: name: nonxdl_8pass_smfma16x16_write_vgpr_vm_read 1934# GCN: V_MFMA 1935# GCN-NEXT: S_NOP 7 1936# GCN-NEXT: S_NOP 1 1937# GCN-NEXT: BUFFER_STORE_DWORD 1938name: nonxdl_8pass_smfma16x16_write_vgpr_vm_read 1939body: | 1940 bb.0: 1941 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1942 BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec 1943... 1944# GCN-LABEL: name: nonxdl_8pass_smfma16x16_write_vgpr_valu_read 1945# GCN: V_MFMA 1946# GCN-NEXT: S_NOP 7 1947# GCN-NEXT: S_NOP 1 1948# GCN-NEXT: V_MOV_B32 1949name: nonxdl_8pass_smfma16x16_write_vgpr_valu_read 1950body: | 1951 bb.0: 1952 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1953 $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec 1954... 1955# GCN-LABEL: name: nonxdl_8pass_smfma16x16_write_vgpr_valu_write 1956# GCN: V_MFMA 1957# GCN-NEXT: S_NOP 7 1958# GCN-NEXT: S_NOP 1 1959# GCN-NEXT: V_MOV_B32 1960name: nonxdl_8pass_smfma16x16_write_vgpr_valu_write 1961body: | 1962 bb.0: 1963 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr26, $vgpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 1964 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 1965... 1966# GCN-LABEL: name: nonxdl_smfma32x32_write_vgpr_vm_read 1967# GCN: V_MFMA 1968# GCN-NEXT: S_NOP 7 1969# GCN-NEXT: S_NOP 7 1970# GCN-NEXT: S_NOP 1 1971# GCN-NEXT: BUFFER_STORE_DWORD 1972name: nonxdl_smfma32x32_write_vgpr_vm_read 1973body: | 1974 bb.0: 1975 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 1976 BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $mode, implicit $exec 1977... 1978# GCN-LABEL: name: nonxdl_smfma32x32_write_vgpr_valu_read 1979# GCN: V_MFMA 1980# GCN-NEXT: S_NOP 7 1981# GCN-NEXT: S_NOP 7 1982# GCN-NEXT: S_NOP 1 1983# GCN-NEXT: V_MOV_B32 1984name: nonxdl_smfma32x32_write_vgpr_valu_read 1985body: | 1986 bb.0: 1987 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 1988 $vgpr4 = V_MOV_B32_e32 $vgpr0, implicit $mode, implicit $exec 1989... 1990# GCN-LABEL: name: nonxdl_smfma32x32_write_vgpr_valu_write 1991# GCN: V_MFMA 1992# GCN-NEXT: S_NOP 7 1993# GCN-NEXT: S_NOP 7 1994# GCN-NEXT: S_NOP 1 1995# GCN-NEXT: V_MOV_B32 1996name: nonxdl_smfma32x32_write_vgpr_valu_write 1997body: | 1998 bb.0: 1999 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2000 $vgpr1 = V_MOV_B32_e32 0, implicit $mode, implicit $exec 2001... 2002# GCN-LABEL: name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap 2003# GCN: V_MFMA 2004# GFX940-NEXT: S_NOP 4 2005# GFX950-NEXT: S_NOP 5 2006# GCN-NEXT: V_MFMA 2007name: xdl_sgemm16x16_4pass_mfma_write_agpr_mfma_read_overlap 2008body: | 2009 bb.0: 2010 $agpr2_agpr3_agpr4_agpr5 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 2011 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_16X16X8XF32_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 2012... 2013# GCN-LABEL: name: smfmac16x16_mfma_write_agpr_mfma_read_overlap 2014# GCN: V_SMFMAC 2015# GFX940-NEXT: S_NOP 4 2016# GFX950-NEXT: S_NOP 5 2017# GCN-NEXT: V_MFMA 2018name: smfmac16x16_mfma_write_agpr_mfma_read_overlap 2019body: | 2020 bb.0: 2021 $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec 2022 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_16X16X8XF32_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 2023... 2024# GCN-LABEL: name: xdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write 2025# GCN: V_MFMA 2026# GCN-NEXT: S_NOP 0 2027# GCN-NEXT: V_MOV_B32 2028name: xdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write 2029body: | 2030 bb.0: 2031 $vgpr10_vgpr11_vgpr12_vgpr13 = V_MFMA_I32_4X4X4I8_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 2032 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 2033... 2034# GCN-LABEL: name: nonxdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write 2035# GCN: V_MFMA 2036# GCN-NEXT: V_MOV_B32 2037name: nonxdl_sgemm4x4_mfma_read_vgpr_srcc_valu_write 2038body: | 2039 bb.0: 2040 $vgpr10_vgpr11_vgpr12_vgpr13 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr4, $vgpr5, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 2041 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 2042... 2043# GCN-LABEL: name: xdl_4pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write 2044# GCN: V_MFMA 2045# GCN-NEXT: S_NOP 2 2046# GCN-NEXT: V_MOV_B32 2047name: xdl_4pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write 2048body: | 2049 bb.0: 2050 $vgpr10_vgpr11_vgpr12_vgpr13 = V_MFMA_F32_16X16X8XF32_vgprcd_e64 $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, 0, implicit $mode, implicit $exec 2051 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 2052... 2053# GCN-LABEL: name: smfmac16x16_read_vgpr_srcc_valu_write 2054# GCN: V_SMFMAC 2055# GCN-NEXT: S_NOP 6 2056# GCN-NEXT: V_MOV_B32 2057name: smfmac16x16_read_vgpr_srcc_valu_write 2058body: | 2059 bb.0: 2060 $vgpr0_vgpr1_vgpr2_vgpr3 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3, implicit $mode, implicit $exec 2061 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 2062... 2063# GCN-LABEL: name: xdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write 2064# GCN: V_MFMA 2065# GCN-NEXT: S_NOP 6 2066# GCN-NEXT: V_MOV_B32 2067name: xdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write 2068body: | 2069 bb.0: 2070 $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115 = V_MFMA_I32_16X16X4I8_vgprcd_e64 $agpr26, $agpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 2071 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 2072... 2073# GCN-LABEL: name: nonxdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write 2074# GCN: V_MFMA 2075# GCN-NEXT: V_MOV_B32 2076name: nonxdl_8pass_sgemm16x16_mfma_read_vgpr_srcc_valu_write 2077body: | 2078 bb.0: 2079 $vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $agpr26, $agpr27, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 2080 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 2081... 2082# GCN-LABEL: name: smfmac32x32_read_vgpr_srcc_valu_write 2083# GCN: V_SMFMAC 2084# GCN-NEXT: S_NOP 7 2085# GCN-NEXT: S_NOP 2 2086# GCN-NEXT: V_MOV_B32 2087name: smfmac32x32_read_vgpr_srcc_valu_write 2088body: | 2089 bb.0: 2090 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_SMFMAC_I32_32X32X32_I8_e64 $agpr0_agpr1, $agpr2_agpr3_agpr4_agpr5, $vgpr2, 0, 0, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $mode, implicit $exec 2091 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 2092... 2093# GCN-LABEL: name: xdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write 2094# GCN: V_MFMA 2095# GCN-NEXT: S_NOP 7 2096# GCN-NEXT: S_NOP 6 2097# GCN-NEXT: V_MOV_B32 2098name: xdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write 2099body: | 2100 bb.0: 2101 $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $agpr126_agpr127, $agpr128_agpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2102 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 2103... 2104# GCN-LABEL: name: nonxdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write 2105# GCN: V_MFMA 2106# GCN-NEXT: V_MOV_B32 2107name: nonxdl_sgemm32x32_mfma_read_vgpr_srcc_valu_write 2108body: | 2109 bb.0: 2110 $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = V_MFMA_F32_32X32X1F32_vgprcd_e64 $agpr26, $agpr28, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2111 $vgpr1 = V_MOV_B32_e32 0, implicit $exec 2112... 2113# GCN-LABEL: name: dgemm_between_valu_write_buffer_store_no_snop 2114# GCN: V_MOV_B32_e32 2115# GCN-NEXT: V_MFMA_F64 2116# GCN-NOT: S_NOP 2117# GCN-NEXT: BUFFER_STORE_DWORD 2118name: dgemm_between_valu_write_buffer_store_no_snop 2119body: | 2120 bb.0: 2121 $vgpr0 = V_MOV_B32_e32 0, implicit $exec 2122 $agpr0_agpr1 = V_MFMA_F64_4X4X4F64_e64 $agpr0_agpr1, $agpr0_agpr1, $agpr0_agpr1, 0, 0, 0, implicit $mode, implicit $exec 2123 BUFFER_STORE_DWORDX2_OFFEN_exact $vgpr2_vgpr3, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec 2124... 2125 2126... 2127# 2 pass source 2128# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc 2129# GCN: V_MFMA 2130# GFX940-NEXT: S_NOP 2 2131# GFX950-NEXT: S_NOP 3 2132# GCN-NEXT: V_MFMA 2133name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcc 2134body: | 2135 bb.0: 2136 2137 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2138 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr2_vgpr3_vgpr4_vgpr5, 1, 2, 3, implicit $mode, implicit $exec 2139 2140... 2141 2142... 2143# 2 pass source 2144# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srca 2145# GCN: V_MFMA 2146# GCN-NEXT: S_NOP 4 2147# GCN-NEXT: V_MFMA 2148name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srca 2149body: | 2150 bb.0: 2151 2152 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2153 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr0_vgpr1, $vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11, 1, 2, 3, implicit $mode, implicit $exec 2154 2155... 2156 2157... 2158# 2 pass source 2159# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcb 2160# GCN: V_MFMA 2161# GCN-NEXT: S_NOP 4 2162# GCN-NEXT: V_MFMA 2163name: xdl_mfma_2pass_write_vgpr_xdl_mfma_read_overlap_srcb 2164body: | 2165 bb.0: 2166 2167 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2168 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr6_vgpr7, $vgpr2_vgpr3, $vgpr8_vgpr9_vgpr10_vgpr11, 1, 2, 3, implicit $mode, implicit $exec 2169 2170... 2171 2172... 2173# 4 pass source 2174# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc 2175# GCN: V_MFMA 2176# GFX940-NEXT: S_NOP 4 2177# GFX950-NEXT: S_NOP 5 2178# GCN-NEXT: V_MFMA 2179name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcc 2180body: | 2181 bb.0: 2182 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2183 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr2_vgpr3_vgpr4_vgpr5, 1, 2, 3, implicit $mode, implicit $exec 2184 2185... 2186 2187... 2188# 4 pass source 2189# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca 2190# GCN: V_MFMA 2191# GCN-NEXT: S_NOP 6 2192# GCN-NEXT: V_MFMA 2193name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srca 2194body: | 2195 bb.0: 2196 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2197 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr10_vgpr11, $vgpr6_vgpr7_vgpr8_vgpr9, 1, 2, 3, implicit $mode, implicit $exec 2198 2199... 2200 2201... 2202# 4 pass source 2203# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb 2204# GCN: V_MFMA 2205# GCN-NEXT: S_NOP 6 2206# GCN-NEXT: V_MFMA 2207name: xdl_mfma_4pass_write_vgpr_xdl_mfma_read_overlap_srcb 2208body: | 2209 bb.0: 2210 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2211 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr10_vgpr11, $vgpr2_vgpr3, $vgpr6_vgpr7_vgpr8_vgpr9, 1, 2, 3, implicit $mode, implicit $exec 2212 2213... 2214 2215... 2216# 2 pass source 2217# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc 2218# GCN: V_MFMA 2219# GCN-NEXT: S_NOP 2 2220# GCN-NEXT: V_MFMA 2221name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc 2222body: | 2223 bb.0: 2224 2225 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2226 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr6, $vgpr8, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 2227 2228... 2229 2230... 2231# 2 pass source 2232# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srca 2233# GCN: V_MFMA 2234# GCN-NEXT: S_NOP 4 2235# GCN-NEXT: V_MFMA 2236name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srca 2237body: | 2238 bb.0: 2239 2240 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2241 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr8, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec 2242 2243... 2244 2245... 2246# 2 pass source 2247# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcb 2248# GCN: V_MFMA 2249# GCN-NEXT: S_NOP 4 2250# GCN-NEXT: V_MFMA 2251name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcb 2252body: | 2253 bb.0: 2254 2255 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X4F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2256 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr1, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec 2257 2258... 2259 2260... 2261# 4 pass source 2262# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc 2263# GCN: V_MFMA 2264# GFX940-NEXT: S_NOP 4 2265# GFX950-NEXT: S_NOP 5 2266# GCN-NEXT: V_MFMA 2267name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcc 2268body: | 2269 bb.0: 2270 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2271 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr9, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 2272 2273... 2274 2275... 2276# 4 pass source 2277# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca 2278# GCN: V_MFMA 2279# GCN-NEXT: S_NOP 6 2280# GCN-NEXT: V_MFMA 2281name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srca 2282body: | 2283 bb.0: 2284 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2285 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr1, $vgpr8, $vgpr6_vgpr7_vgpr8_vgpr9, 0, 0, 0, implicit $mode, implicit $exec 2286 2287... 2288 2289... 2290# 4 pass source 2291# GCN-LABEL: name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb 2292# GCN: V_MFMA 2293# GCN-NEXT: S_NOP 6 2294# GCN-NEXT: V_MFMA 2295name: xdl_mfma_4pass_write_vgpr_sgemm_mfma_read_overlap_srcb 2296body: | 2297 bb.0: 2298 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_16X16X16F16_vgprcd_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, 1, 2, 3, implicit $mode, implicit $exec 2299 $vgpr0_vgpr1_vgpr2_vgpr3 = V_MFMA_F32_4X4X1F32_vgprcd_e64 $vgpr8, $vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit $mode, implicit $exec 2300 2301... 2302 2303... 2304# 8 pass source 2305# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc 2306# GCN: V_MFMA 2307# GCN-NEXT: S_NOP 7 2308# GFX940-NEXT: S_NOP 0 2309# GFX950-NEXT: S_NOP 1 2310# GCN-NEXT: V_MFMA 2311name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc 2312body: | 2313 bb.0: 2314 renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec 2315 2316 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 2317... 2318 2319... 2320# 8 pass source 2321# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca 2322# GCN: V_MFMA 2323# GCN-NEXT: S_NOP 7 2324# GCN-NEXT: S_NOP 2 2325# GCN-NEXT: V_MFMA 2326name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca 2327body: | 2328 bb.0: 2329 renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec 2330 2331 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr0, $vgpr33, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec 2332... 2333 2334... 2335# 8 pass source 2336# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb 2337# GCN: V_MFMA 2338# GCN-NEXT: S_NOP 7 2339# GCN-NEXT: S_NOP 2 2340# GCN-NEXT: V_MFMA 2341name: xdl_mfma_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb 2342body: | 2343 bb.0: 2344 renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec 2345 2346 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr33, $vgpr1, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec 2347... 2348 2349... 2350# 16 pass source 2351# GCN-LABEL: name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc 2352# GCN: V_MFMA 2353# GCN-NEXT: S_NOP 7 2354# GCN-NEXT: S_NOP 7 2355# GFX940-NEXT: S_NOP 0 2356# GFX950-NEXT: S_NOP 1 2357# GCN-NEXT: V_MFMA 2358name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc 2359body: | 2360 bb.0: 2361 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2362 2363 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 killed $vgpr32, killed $vgpr33, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 1, 2, 3, implicit $mode, implicit $exec 2364 2365... 2366 2367... 2368# 16 pass source 2369# GCN-LABEL: name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca 2370# GCN: V_MFMA 2371# GCN-NEXT: S_NOP 7 2372# GCN-NEXT: S_NOP 7 2373# GCN-NEXT: S_NOP 2 2374# GCN-NEXT: V_MFMA 2375name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca 2376body: | 2377 bb.0: 2378 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2379 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 killed $vgpr0, killed $vgpr33, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 1, 2, 3, implicit $mode, implicit $exec 2380 2381... 2382 2383... 2384# 16 pass source 2385# GCN-LABEL: name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb 2386# GCN: V_MFMA 2387# GCN-NEXT: S_NOP 7 2388# GCN-NEXT: S_NOP 7 2389# GCN-NEXT: S_NOP 2 2390# GCN-NEXT: V_MFMA 2391name: xdl_16pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb 2392body: | 2393 bb.0: 2394 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2395 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X2F32_vgprcd_e64 killed $vgpr33, killed $vgpr0, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, 1, 2, 3, implicit $mode, implicit $exec 2396 2397... 2398 2399... 2400# 8 pass source 2401# GCN-LABEL: name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc 2402# GCN: V_MFMA 2403# GCN-NEXT: S_NOP 7 2404# GCN-NEXT: V_MFMA 2405name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcc 2406body: | 2407 bb.0: 2408 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 2409 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 2410... 2411 2412... 2413# 8 pass source 2414# GCN-LABEL: name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca 2415# GCN: V_MFMA 2416# GCN-NEXT: S_NOP 7 2417# GCN-NEXT: S_NOP 1 2418# GCN-NEXT: V_MFMA 2419name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srca 2420body: | 2421 bb.0: 2422 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 2423 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr3, $vgpr19, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec 2424... 2425 2426... 2427# 8 pass source 2428# GCN-LABEL: name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb 2429# GCN: V_MFMA 2430# GCN-NEXT: S_NOP 7 2431# GCN-NEXT: S_NOP 1 2432# GCN-NEXT: V_MFMA 2433name: nonxdl_8pass_write_vgpr_nonxdl_sgemm_mfma_read_overlap_srcb 2434body: | 2435 bb.0: 2436 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr18, $vgpr19, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, 0, 0, 0, implicit $mode, implicit $exec 2437 $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = V_MFMA_F32_16X16X1F32_vgprcd_e64 $vgpr19, $vgpr3, $vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec 2438... 2439... 2440# 8 pass source 2441# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc 2442# GCN: V_MFMA 2443# GCN-NEXT: S_NOP 7 2444# GFX940-NEXT: S_NOP 0 2445# GFX950-NEXT: S_NOP 1 2446# GCN-NEXT: V_MFMA 2447name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcc 2448body: | 2449 bb.0: 2450 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec 2451 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr18_vgpr19, killed $vgpr20_vgpr21, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, 0, 0, 0, implicit $mode, implicit $exec 2452... 2453 2454... 2455# 8 pass source 2456# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca 2457# GCN: V_MFMA 2458# GCN-NEXT: S_NOP 7 2459# GCN-NEXT: S_NOP 2 2460# GCN-NEXT: V_MFMA 2461name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srca 2462body: | 2463 bb.0: 2464 renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec 2465 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr2_vgpr3, killed $vgpr36_vgpr37, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2466... 2467 2468... 2469# 8 pass source 2470# GCN-LABEL: name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb 2471# GCN: V_MFMA 2472# GCN-NEXT: S_NOP 7 2473# GCN-NEXT: S_NOP 2 2474# GCN-NEXT: V_MFMA 2475name: xdl_mfma_8pass_write_vgpr_xdl_mfma_read_overlap_srcb 2476body: | 2477 bb.0: 2478 renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec 2479 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr36_vgpr37, killed $vgpr2_vgpr3, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2480... 2481 2482... 2483# 16 pass source 2484# GCN-LABEL: name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc 2485# GCN: V_MFMA 2486# GCN-NEXT: S_NOP 7 2487# GCN-NEXT: S_NOP 7 2488# GFX940-NEXT: S_NOP 0 2489# GFX950-NEXT: S_NOP 1 2490# GCN-NEXT: V_MFMA 2491name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcc 2492body: | 2493 bb.0: 2494 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2495 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, 0, 0, 0, implicit $mode, implicit $exec 2496 2497... 2498 2499... 2500# 16 pass source 2501# GCN-LABEL: name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srca 2502# GCN: V_MFMA 2503# GCN-NEXT: S_NOP 7 2504# GCN-NEXT: S_NOP 7 2505# GCN-NEXT: S_NOP 2 2506# GCN-NEXT: V_MFMA 2507name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srca 2508body: | 2509 bb.0: 2510 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2511 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr128_vgpr129, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, implicit $mode, implicit $exec 2512 2513 2514... 2515 2516... 2517# 16 pass source 2518# GCN-LABEL: name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcb 2519# GCN: V_MFMA 2520# GCN-NEXT: S_NOP 7 2521# GCN-NEXT: S_NOP 7 2522# GCN-NEXT: S_NOP 2 2523# GCN-NEXT: V_MFMA 2524name: xdl_16pass_write_vgpr_xdl_mfma_read_overlap_srcb 2525body: | 2526 bb.0: 2527 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, 0, 0, 0, implicit $mode, implicit $exec 2528 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = V_MFMA_F32_32X32X4F16_vgprcd_e64 $vgpr128_vgpr129, $vgpr2_vgpr3, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, 0, 0, 0, implicit $mode, implicit $exec 2529 2530... 2531 2532... 2533# 2 pass source 2534# GCN-LABEL: name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc 2535# GCN: V_MFMA 2536# GFX940-NEXT: S_NOP 2 2537# GFX950-NEXT: S_NOP 3 2538# GCN-NEXT: V_SMFMAC_ 2539name: xdl_mfma_2pass_write_agpr_smfmac_read_overlap_srcc 2540body: | 2541 bb.0: 2542 2543 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_F32_4X4X4F16_e64 $vgpr4_vgpr5, $vgpr6_vgpr7, $agpr0_agpr1_agpr2_agpr3, 1, 2, 3, implicit $mode, implicit $exec 2544 $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec 2545 2546... 2547 2548... 2549# GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc 2550# GCN: V_MFMA 2551# GFX940-NEXT: S_NOP 4 2552# GFX950-NEXT: S_NOP 5 2553# GCN-NEXT: V_SMFMAC_ 2554name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc 2555body: | 2556 bb.0: 2557 $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_16X16X32I8_e64 $vgpr0_vgpr1, $vgpr2_vgpr3, $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec 2558 $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec 2559 2560... 2561 2562... 2563# GCN-LABEL: name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc 2564# GCN: V_MFMA 2565# GCN-NEXT: S_NOP 7 2566# GFX940-NEXT: S_NOP 0 2567# GFX950-NEXT: S_NOP 1 2568# GCN-NEXT: V_SMFMAC_ 2569name: xdl_8pass_mfma_write_agpr_smfmac_read_overlap_srcc 2570body: | 2571 bb.0: 2572 renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = V_MFMA_F32_32X32X8F16_e64 killed $vgpr0_vgpr1, killed $vgpr2_vgpr3, 1065353216, 0, 0, 0, implicit $mode, implicit $exec 2573 $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec 2574... 2575 2576... 2577# GCN-LABEL: name: xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc 2578# GCN: V_MFMA 2579# GCN-NEXT: S_NOP 7 2580# GCN-NEXT: S_NOP 7 2581# GFX940-NEXT: S_NOP 0 2582# GFX950-NEXT: S_NOP 1 2583# GCN-NEXT: V_SMFMAC_ 2584name: xdl_16pass_mfma_write_agpr_smfmac_read_overlap_srcc 2585body: | 2586 bb.0: 2587 $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X4F16_e64 $vgpr126_vgpr127, $vgpr128_vgpr129, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, 0, 0, 0, implicit $mode, implicit $exec 2588 $agpr2_agpr3_agpr4_agpr5 = V_SMFMAC_F32_16X16X32_F16_e64 $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5, $vgpr32, 0, 0, $agpr2_agpr3_agpr4_agpr5, implicit $mode, implicit $exec 2589... 2590