1# RUN: llvm-mca -mtriple=amdgcn -mcpu=gfx950 --timeline --iterations=1 --timeline-max-cycles=0 < %s | FileCheck %s 2 3# CHECK: Iterations: 1 4# CHECK: Instructions: 133 5# CHECK: Total Cycles: 1101 6# CHECK: Total uOps: 133 7 8v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 9v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] 10v_mfma_f32_32x32x16_f16 v[0:15], v[0:3], v[0:3], v[0:15] 11v_mfma_f32_32x32x16_f16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 12v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] 13v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 14v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 15v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7] 16v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15] 17v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 18v_mfma_f32_16x16x32_bf16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 19v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[0:3], a[4:7] 20 21v_mfma_ld_scale_b32 v0, v0 22 23;; FIXME: should have different cycle count depending on whether either matrix is f8 24;; TODO: test vdc/adc 25v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] 26v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:1 27v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:2 28v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:3 29v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:7], v[0:3] blgp:4 30v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 31v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2 32v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:3 33v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:7], v[4:11], v[0:3] cbsz:4 34v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2 blgp:1 35v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] cbsz:1 blgp:2 36 37;; FIXME: should have different cycle count depending on whether either matrix is f8 38v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] 39v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 40v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:2 41v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:3 42v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:7], v[0:15] blgp:4 43v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:1 44v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2 45v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:3 46v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:7], v[4:11], v[0:15] cbsz:4 47v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2 48v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 49 50;; FIXME: should have different cycle count depending on whether either matrix is f8 51v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 52v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 blgp:1 53v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:9], v[0:3], v5, v5 cbsz:2 blgp:2 54 55v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 56v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15], v5, v5 cbsz:2 blgp:1 57v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:9], v[0:15], v5, v5 cbsz:2 blgp:2 58 59;; TODO: These results are wrong 60v_smfmac_f32_16x16x64_f16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 61v_smfmac_f32_32x32x32_f16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 62v_smfmac_f32_16x16x64_bf16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 63v_smfmac_f32_32x32x32_bf16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 64v_smfmac_i32_16x16x128_i8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 65v_smfmac_i32_32x32x64_i8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 66 67v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 68v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 69v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 70v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 71 72v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 73v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 74v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 75v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 76 77v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] 78v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] 79 80v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] 81v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] 82 83v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] 84v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] 85 86v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] 87v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] 88 89v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] 90v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] 91 92v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] 93v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] 94 95v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] 96v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] 97 98v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] 99v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] 100 101v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] 102v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] 103 104v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] 105v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] 106 107v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] 108v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] 109 110v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] 111v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] 112 113v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] 114v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] 115 116v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] 117v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] 118 119v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] 120v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] 121 122v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] 123v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] 124 125v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] 126v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] 127 128v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] 129v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] 130 131v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] 132v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] 133 134v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 135v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 136 137v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] 138v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] 139 140v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] 141v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] 142 143v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] 144v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] 145 146v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] 147v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] 148 149v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 150v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 151 152v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 153v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 154 155v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 156v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 157 158v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 159v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 160 161v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 162v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 163 164v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] 165v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] 166 167v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] 168v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] 169 170v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] 171v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] 172 173v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] 174v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] 175 176v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] 177v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] 178v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] 179v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] 180 181v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 182v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 183v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 184v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 185 186v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 187v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 188v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 189v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 190 191# CHECK: [0] [1] [2] [3] [4] [5] [6] Instructions: 192# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_f16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 193# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[0:3], a[4:7] 194# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_f16 v[0:15], v[0:3], v[0:3], v[0:15] 195# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_f16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 196# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf16 v[0:15], v[0:3], v[0:3], v[0:15] 197# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf16 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 198# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x64_i8 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 199# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[0:3], a[4:7] 200# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x32_i8 v[0:15], v[0:3], v[0:3], v[0:15] 201# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x32_i8 a[0:15], a[0:3], a[0:3], a[0:15] blgp:2 202# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf16 a[0:3], a[0:3], a[0:3], a[0:3] blgp:1 203# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[0:3], a[4:7] 204# CHECK-NEXT: - - - - 1.00 - - v_mfma_ld_scale_b32 v0, v0 205# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] 206# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] blgp:1 207# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:2 208# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] blgp:3 209# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:7], v[0:3] blgp:4 210# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3] cbsz:1 211# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2 212# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:3 213# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:7], v[4:11], v[0:3] cbsz:4 214# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:11], v[0:3] cbsz:2 blgp:1 215# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:9], v[0:3] cbsz:1 blgp:2 216# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] 217# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 218# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:2 219# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:9], v[0:15] blgp:3 220# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:7], v[0:15] blgp:4 221# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] cbsz:1 222# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2 223# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:3 224# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:7], v[4:11], v[0:15] cbsz:4 225# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15] cbsz:2 226# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15] blgp:1 227# CHECK-NEXT: - - - - - - 4.00 v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 op_sel_hi:[0,0,0] 228# CHECK-NEXT: - - - - - - 4.00 v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:11], v[4:11], v[0:3], v5, v5 op_sel_hi:[0,0,0] blgp:1 229# CHECK-NEXT: - - - - - - 4.00 v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[4:9], v[4:9], v[0:3], v5, v5 op_sel_hi:[0,0,0] cbsz:2 blgp:2 230# CHECK-NEXT: - - - - - - 8.00 v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:11], v[4:11], v[0:15], v5, v5 op_sel_hi:[0,0,0] 231# CHECK-NEXT: - - - - - - 8.00 v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:11], v[0:15], v5, v5 op_sel_hi:[0,0,0] cbsz:2 blgp:1 232# CHECK-NEXT: - - - - - - 8.00 v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[4:9], v[4:9], v[0:15], v5, v5 op_sel_hi:[0,0,0] cbsz:2 blgp:2 233# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_f16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 234# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_f16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 235# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf16 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 236# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf16 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 237# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x128_i8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 238# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x64_i8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 239# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 240# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 241# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 242# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], a[2:5], v[4:11], v3 cbsz:3 abid:1 243# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_bf8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 244# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_bf8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 245# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_fp8_bf8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 246# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x64_fp8_fp8 v[10:25], a[2:5], v[4:11], v3 cbsz:3 abid:1 247# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] 248# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] 249# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] 250# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] 251# CHECK-NEXT: - - - - 4.00 - - v_mfma_f64_4x4x4_4b_f64 a[0:1], v[0:1], a[2:3], a[2:3] 252# CHECK-NEXT: - - - - 4.00 - - v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1], v[2:3], v[2:3] 253# CHECK-NEXT: - - - - 16.00 - - v_mfma_f64_16x16x4_f64 a[0:7], v[0:1], v[2:3], a[0:7] 254# CHECK-NEXT: - - - - 16.00 - - v_mfma_f64_16x16x4_f64 v[0:7], v[0:1], v[2:3], v[0:7] 255# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 v[0:3], v[4:5], v[6:7], v[0:3] 256# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_f16 a[0:3], v[4:5], v[6:7], a[0:3] 257# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 v[0:15], v[4:5], v[6:7], v[0:15] 258# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_f16 a[0:15], v[4:5], v[6:7], a[0:15] 259# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 v[0:3], v[4:5], v[6:7], v[0:3] 260# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x16_bf16 a[0:3], v[4:5], v[6:7], a[0:3] 261# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 v[0:15], v[4:5], v[6:7], v[0:15] 262# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x8_bf16 a[0:15], v[4:5], v[6:7], a[0:15] 263# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 v[0:3], v[4:5], v[6:7], v[0:3] 264# CHECK-NEXT: - - - - - - 4.00 v_mfma_i32_16x16x32_i8 a[0:3], v[4:5], v[6:7], a[0:3] 265# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 v[0:15], v[2:3], v[4:5], v[0:15] 266# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_32x32x16_i8 a[0:15], v[2:3], v[4:5], a[0:15] 267# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 v[0:3], v[0:1], v[2:3], v[2:5] 268# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_f16 a[0:3], v[0:1], v[2:3], a[2:5] 269# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 v[0:15], v[2:3], v[4:5], v[18:33] 270# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_f16 a[0:15], v[2:3], v[4:5], a[18:33] 271# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 v[0:31], v[0:1], v[2:3], v[34:65] 272# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_f16 a[0:31], v[0:1], v[2:3], a[34:65] 273# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 v[0:3], v[0:1], v[2:3], v[2:5] 274# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x4_16b_bf16 a[0:3], v[0:1], v[2:3], a[2:5] 275# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 v[0:15], v[2:3], v[4:5], v[18:33] 276# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_4b_bf16 a[0:15], v[2:3], v[4:5], a[18:33] 277# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 v[0:31], v[0:1], v[2:3], v[34:65] 278# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x4_2b_bf16 a[0:31], v[0:1], v[2:3], a[34:65] 279# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 v[0:3], v0, v1, v[2:5] 280# CHECK-NEXT: - - - - - - 2.00 v_mfma_f32_4x4x1_16b_f32 a[0:3], v0, v1, a[2:5] 281# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, v[18:33] 282# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x1_4b_f32 a[0:15], v0, v1, a[18:33] 283# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 v[0:3], v0, v1, v[2:5] 284# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_16x16x4_f32 a[0:3], v0, v1, a[2:5] 285# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, v[34:65] blgp:7 286# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x1_2b_f32 a[0:31], v0, v1, a[34:65] blgp:7 287# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 v[0:15], v0, v1, v[18:33] 288# CHECK-NEXT: - - - - - - 16.00 v_mfma_f32_32x32x2_f32 a[0:15], v0, v1, a[18:33] 289# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 v[0:3], v0, v1, v[2:5] 290# CHECK-NEXT: - - - - - - 2.00 v_mfma_i32_4x4x4_16b_i8 a[0:3], v0, v1, a[2:5] 291# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, v[18:33] 292# CHECK-NEXT: - - - - - - 8.00 v_mfma_i32_16x16x4_4b_i8 a[0:15], v0, v1, a[18:33] 293# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 v[0:31], v0, v1, v[34:65] 294# CHECK-NEXT: - - - - - - 16.00 v_mfma_i32_32x32x4_2b_i8 a[0:31], v0, v1, a[34:65] 295# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 v[10:13], a[2:3], v[4:7], v0 cbsz:3 abid:1 296# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_f16 a[10:13], v[2:3], a[4:7], v1 297# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 v[10:25], a[2:3], v[4:7], v2 cbsz:3 abid:1 298# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x16_f16 a[10:25], v[2:3], a[4:7], v3 299# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 v[10:13], a[2:3], v[4:7], v4 cbsz:3 abid:1 300# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x32_bf16 a[10:13], v[2:3], a[4:7], v5 301# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 v[10:13], a[2:3], v[4:7], v8 cbsz:3 abid:1 302# CHECK-NEXT: - - - - - - 4.00 v_smfmac_i32_16x16x64_i8 a[10:13], v[2:3], a[4:7], v9 303# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 v[10:25], a[2:3], v[4:7], v10 cbsz:3 abid:1 304# CHECK-NEXT: - - - - - - 8.00 v_smfmac_i32_32x32x32_i8 a[10:25], v[2:3], a[4:7], v11 305# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] 306# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] 307# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] 308# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_bf8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] 309# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 v[0:3], v[2:3], v[4:5], v[0:3] 310# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_bf8 a[0:3], v[2:3], v[4:5], a[0:3] 311# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 v[0:3], v[2:3], v[4:5], v[0:3] 312# CHECK-NEXT: - - - - - - 4.00 v_mfma_f32_16x16x32_fp8_fp8 a[0:3], v[2:3], v[4:5], a[0:3] 313# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] 314# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_bf8 v[0:15], v[2:3], v[4:5], v[0:15] 315# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_bf8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] 316# CHECK-NEXT: - - - - - - 8.00 v_mfma_f32_32x32x16_fp8_fp8 v[0:15], v[2:3], v[4:5], v[0:15] 317# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 318# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_bf8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 319# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_bf8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 320# CHECK-NEXT: - - - - - - 4.00 v_smfmac_f32_16x16x64_fp8_fp8 v[0:3], a[2:3], v[4:7], v1 cbsz:3 abid:1 321# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 322# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_bf8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 323# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_bf8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 324# CHECK-NEXT: - - - - - - 8.00 v_smfmac_f32_32x32x32_fp8_fp8 v[0:15], v[2:3], v[4:7], v1 cbsz:3 abid:1 325