1// RUN: mlir-opt %s -split-input-file -verify-diagnostics 2 3func.func @mixing_packed_trunc_types(%arg0: f32, %arg1: vector<4xf8E5M2FNUZ>) -> vector<4xf8E4M3FNUZ> { 4 // expected-error@+1 {{'amdgpu.packed_trunc_2xfp8' op existing values must have same type as result}} 5 %ret = amdgpu.packed_trunc_2xfp8 %arg0, undef into %arg1[word 0] : f32 to vector<4xf8E4M3FNUZ> into vector<4xf8E5M2FNUZ> 6 func.return %ret : vector<4xf8E4M3FNUZ> 7} 8 9// ----- 10 11func.func @mixing_packed_stoch_round_types(%arg0: f32, %arg1: i32, %arg2: vector<4xf8E5M2FNUZ>) -> vector<4xf8E4M3FNUZ> { 12 // expected-error@+1 {{'amdgpu.packed_stoch_round_fp8' op existing values must have same type as result}} 13 %ret = amdgpu.packed_stoch_round_fp8 %arg0 + %arg1 into %arg2[0] : f32 to vector<4xf8E4M3FNUZ> into vector<4xf8E5M2FNUZ> 14 func.return %ret : vector<4xf8E4M3FNUZ> 15} 16 17// ----- 18 19func.func @bad_source_types(%a: vector<2xf32>, %b: vector<4xf16>, 20 %c: vector<32xf32>) -> vector<32xf32> { 21 // expected-error@+1 {{'amdgpu.mfma' op expected both non-f8 source operand types to match exactly}} 22 %d = amdgpu.mfma %a * %b + %c { 23 m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, 24 abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<2xf32>, vector<4xf16>, vector<32xf32> 25 func.return %d : vector<32xf32> 26} 27 28// ----- 29 30func.func @bad_source_types_f8(%a: vector<8xf8E5M2FNUZ>, %b: vector<8xi8>, 31 %c: vector<32xf32>) -> vector<32xf32> { 32 // expected-error@+1 {{'amdgpu.mfma' op expected both source operands to have f8 elements}} 33 %d = amdgpu.mfma %a * %b + %c { 34 m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, 35 abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<8xf8E5M2FNUZ>, vector<8xi8>, vector<32xf32> 36 func.return %d : vector<32xf32> 37} 38 39// ----- 40 41func.func @bad_source_arguments(%a: vector<2xf32>, %b: vector<2xf32>, 42 %c: vector<32xf32>) -> vector<32xf32> { 43 // expected-error@+1 {{'amdgpu.mfma' op expected 1 source values for this operation but got 2}} 44 %d = amdgpu.mfma %a * %b + %c { 45 m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, 46 abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<2xf32>, vector<2xf32>, vector<32xf32> 47 func.return %d : vector<32xf32> 48} 49 50// ----- 51 52func.func @bad_source_arguments_i8(%a: vector<8xi8>, %b: vector<8xi8>, 53 %c: vector<4xi32>) -> vector<4xi32> { 54 // expected-error@+1 {{'amdgpu.mfma' op expected 4 source values for this operation but got 8}} 55 %d = amdgpu.mfma %a * %b + %c { 56 m = 32 : i32, n = 32 : i32, k = 4 : i32, blocks = 2 : i32, 57 abid = 0 : i32, cbsz = 0 : i32} blgp = none : vector<8xi8>, vector<8xi8>, vector<4xi32> 58 func.return %d : vector<4xi32> 59} 60 61// ----- 62 63func.func @bad_dest_type(%a: f32, %b: f32, %c: vector<16xf32>) -> vector<16xf32> { 64 // expected-error@+1 {{'amdgpu.mfma' op expected 32 result values for this operation but got 16}} 65 %d = amdgpu.mfma %a * %b + %c { 66 m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, 67 abid = 0 : i32, cbsz = 0 : i32} blgp = none : f32, f32, vector<16xf32> 68 return %d : vector<16xf32> 69} 70 71// ----- 72 73func.func @f64_permuting_b(%a: f64, %b: f64, %c: vector<4xf64>) -> vector<4xf64> { 74 // expected-error@+1 {{'amdgpu.mfma' op double-precision ops do not support permuting lanes of B}} 75 %d = amdgpu.mfma %a * %b + %c { 76 m = 16 : i32, n = 16 : i32, k = 4 : i32, blocks = 1 : i32, 77 abid = 0 : i32, cbsz = 0 : i32} blgp = bcast_first_32 : f64, f64, vector<4xf64> 78 return %d : vector<4xf64> 79} 80 81// ----- 82 83func.func @f64_permuting_a(%a: f64, %b: f64, %c: vector<4xf64>) -> vector<4xf64> { 84 // expected-error@+1 {{'amdgpu.mfma' op double-precision ops do not support permuting lanes of A}} 85 %d = amdgpu.mfma %a * %b + %c { 86 m = 16 : i32, n = 16 : i32, k = 4 : i32, blocks = 1 : i32, 87 abid = 0 : i32, cbsz = 1 : i32} blgp = none : f64, f64, vector<4xf64> 88 return %d : vector<4xf64> 89} 90 91// ----- 92 93func.func @abid_without_bradcast(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { 94 // expected-error@+1 {{'amdgpu.mfma' op block ID for permuting A (abid) must be below 2 ** cbsz}} 95 %d = amdgpu.mfma %a * %b + %c { 96 m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, 97 abid = 1 : i32, cbsz = 0 : i32} blgp = none : f32, f32, vector<32xf32> 98 func.return %d : vector<32xf32> 99} 100 101// ----- 102 103func.func @abid_too_large(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { 104 // expected-error@+1 {{'amdgpu.mfma' op block ID for permuting A (abid) must be below 2 ** cbsz}} 105 %d = amdgpu.mfma %a * %b + %c { 106 m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, 107 abid = 2 : i32, cbsz = 1 : i32} blgp = none : f32, f32, vector<32xf32> 108 func.return %d : vector<32xf32> 109} 110 111// ----- 112 113func.func @no_negation(%a: f32, %b: f32, %c: vector<32xf32>) -> vector<32xf32> { 114 // expected-error@+1 {{'amdgpu.mfma' op negation flags only available for double-precision operations}} 115 %d = amdgpu.mfma %a * %b + %c { 116 m = 32 : i32, n = 32 : i32, k = 1 : i32, blocks = 2 : i32, 117 abid = 0 : i32, cbsz = 0 : i32, negateA} blgp = none : f32, f32, vector<32xf32> 118 func.return %d : vector<32xf32> 119} 120 121// ----- 122 123func.func @wmma(%arg0 : vector<16xf16>, %arg1 : vector<8xi32>) -> vector<8xi32> { 124 // expected-error@+1 {{'amdgpu.wmma' op Expected int sources with int destination}} 125 %0 = amdgpu.wmma %arg0 * %arg0 + %arg1 : vector<16xf16>, vector<16xf16>, vector<8xi32> 126 func.return %0 : vector<8xi32> 127} 128