1// RUN: mlir-opt -allow-unregistered-dialect %s | FileCheck %s 2// Verify the printed output can be parsed. 3// RUN: mlir-opt -allow-unregistered-dialect %s | mlir-opt -allow-unregistered-dialect | FileCheck %s 4// Verify the generic form can be parsed. 5// RUN: mlir-opt -allow-unregistered-dialect -mlir-print-op-generic %s | mlir-opt -allow-unregistered-dialect | FileCheck %s 6 7// CHECK-LABEL: func @ext_packed_fp8 8// CHECK: amdgpu.ext_packed_fp8 9func.func @ext_packed_fp8(%v: vector<4xf8E4M3FNUZ>) -> f32 { 10 %ret = amdgpu.ext_packed_fp8 %v[0] : vector<4xf8E4M3FNUZ> to f32 11 func.return %ret : f32 12} 13 14// CHECK-LABEL: func @packed_trunc_2xfp8 15// CHECK: amdgpu.packed_trunc_2xfp8 16func.func @packed_trunc_2xfp8(%v1: f32, %v2: f32, %others: vector<4xf8E5M2FNUZ>, %stoch: i32) -> vector<4xf8E5M2FNUZ> { 17 %ret = amdgpu.packed_trunc_2xfp8 %v1, %v2 into %others[word 1] : f32 to vector<4xf8E5M2FNUZ> into vector<4xf8E5M2FNUZ> 18 func.return %ret : vector<4xf8E5M2FNUZ> 19} 20 21// CHECK-LABEL: func @packed_stoch_round_fp8 22// CHECK: amdgpu.packed_stoch_round_fp8 23func.func @packed_stoch_round_fp8(%v1: f32, %stoch: i32, %others: vector<4xf8E5M2FNUZ>) -> vector<4xf8E5M2FNUZ> { 24 %ret = amdgpu.packed_stoch_round_fp8 %v1 + %stoch into %others[2] : f32 to vector<4xf8E5M2FNUZ> into vector<4xf8E5M2FNUZ> 25 func.return %ret : vector<4xf8E5M2FNUZ> 26} 27 28// CHECK-LABEL: func @raw_buffer_load_f32_from_rank_1 29func.func @raw_buffer_load_f32_from_rank_1(%src : memref<128xf32>, %offset : i32, %idx0 : i32) -> f32 { 30 // CHECK: amdgpu.raw_buffer_load {indexOffset = 1 : i32} %{{.*}}[{{.*}}] sgprOffset %{{.*}} : memref<128xf32>, i32 -> f32 31 %0 = amdgpu.raw_buffer_load {boundsCheck = true, indexOffset = 1 : i32} %src[%idx0] sgprOffset %offset : memref<128xf32>, i32 -> f32 32 func.return %0 : f32 33} 34 35// CHECK-LABEL: func @raw_buffer_load_f32_from_rank_4 36func.func @raw_buffer_load_f32_from_rank_4(%src : memref<128x64x32x16xf32>, %offset : i32, %idx0 : i32, %idx1 : i32, %idx2 : i32, %idx3 : i32) -> f32 { 37 // CHECK: amdgpu.raw_buffer_load {indexOffset = 1 : i32} %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] sgprOffset %{{.*}} : memref<128x64x32x16xf32>, i32, i32, i32, i32 -> f32 38 %0 = amdgpu.raw_buffer_load {boundsCheck = true, indexOffset = 1 : i32} %src[%idx0, %idx1, %idx2, %idx3] sgprOffset %offset : memref<128x64x32x16xf32>, i32, i32, i32, i32 -> f32 39 func.return %0 : f32 40} 41 42// CHECK-LABEL: func @raw_buffer_load_scalar 43func.func @raw_buffer_load_scalar(%src : memref<f32>) -> f32 { 44 // CHECK: amdgpu.raw_buffer_load {indexOffset = 1 : i32} %{{.*}}[] : memref<f32> -> f32 45 %0 = amdgpu.raw_buffer_load {indexOffset = 1 : i32} %src[] : memref<f32> -> f32 46 func.return %0 : f32 47} 48 49// CHECK-LABEL: func @raw_buffer_load_4xf32_from_rank_4 50func.func @raw_buffer_load_4xf32_from_rank_4(%src : memref<128x64x32x16xf32>, %offset : i32, %idx0 : i32, %idx1 : i32, %idx2 : i32, %idx3 : i32) -> vector<4xf32> { 51 // CHECK: amdgpu.raw_buffer_load {indexOffset = 1 : i32} %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] sgprOffset %{{.*}} : memref<128x64x32x16xf32>, i32, i32, i32, i32 -> vector<4xf32> 52 %0 = amdgpu.raw_buffer_load {boundsCheck = true, indexOffset = 1 : i32} %src[%idx0, %idx1, %idx2, %idx3] sgprOffset %offset : memref<128x64x32x16xf32>, i32, i32, i32, i32 -> vector<4xf32> 53 func.return %0 : vector<4xf32> 54} 55 56// CHECK-LABEL: func @raw_buffer_store_f32_to_rank_1 57func.func @raw_buffer_store_f32_to_rank_1(%value : f32, %dst : memref<128xf32>, %offset : i32, %idx0 : i32) { 58 // CHECK: amdgpu.raw_buffer_store {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[{{.*}}] sgprOffset %{{.*}} : f32 -> memref<128xf32>, i32 59 amdgpu.raw_buffer_store {boundsCheck = true, indexOffset = 1 : i32} %value -> %dst[%idx0] sgprOffset %offset : f32 -> memref<128xf32>, i32 60 func.return 61} 62 63// CHECK-LABEL: func @raw_buffer_store_f32_to_rank_4 64func.func @raw_buffer_store_f32_to_rank_4(%value : f32, %dst : memref<128x64x32x16xf32>, %offset : i32, %idx0 : i32, %idx1 : i32, %idx2 : i32, %idx3 : i32) { 65 // CHECK: amdgpu.raw_buffer_store {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] sgprOffset %{{.*}} : f32 -> memref<128x64x32x16xf32>, i32, i32, i32, i32 66 amdgpu.raw_buffer_store {boundsCheck = true, indexOffset = 1 : i32} %value -> %dst[%idx0, %idx1, %idx2, %idx3] sgprOffset %offset : f32 -> memref<128x64x32x16xf32>, i32, i32, i32, i32 67 func.return 68} 69 70// CHECK-LABEL: func @raw_buffer_store_4xf32_to_rank_4 71func.func @raw_buffer_store_4xf32_to_rank_4(%value : vector<4xf32>, %dst : memref<128x64x32x16xf32>, %offset : i32, %idx0 : i32, %idx1 : i32, %idx2 : i32, %idx3 : i32) { 72 // CHECK: amdgpu.raw_buffer_store {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] sgprOffset %{{.*}} : vector<4xf32> -> memref<128x64x32x16xf32>, i32, i32, i32, i32 73 amdgpu.raw_buffer_store {boundsCheck = true, indexOffset = 1 : i32} %value -> %dst[%idx0, %idx1, %idx2, %idx3] sgprOffset %offset : vector<4xf32> -> memref<128x64x32x16xf32>, i32, i32, i32, i32 74 func.return 75} 76 77// CHECK-LABEL: func @raw_buffer_store_scalar 78func.func @raw_buffer_store_scalar(%value : f32, %dst : memref<f32>) { 79 // CHECK: amdgpu.raw_buffer_store {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[] : f32 -> memref<f32> 80 amdgpu.raw_buffer_store {indexOffset = 1 : i32} %value -> %dst[] : f32 -> memref<f32> 81 func.return 82} 83 84// CHECK-LABEL: func @raw_buffer_atomic_fadd_f32_to_rank_1 85func.func @raw_buffer_atomic_fadd_f32_to_rank_1(%value : f32, %dst : memref<128xf32>, %offset : i32, %idx0 : i32) { 86 // CHECK: amdgpu.raw_buffer_atomic_fadd {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[{{.*}}] sgprOffset %{{.*}} : f32 -> memref<128xf32>, i32 87 amdgpu.raw_buffer_atomic_fadd {boundsCheck = true, indexOffset = 1 : i32} %value -> %dst[%idx0] sgprOffset %offset : f32 -> memref<128xf32>, i32 88 func.return 89} 90 91// CHECK-LABEL: func @raw_buffer_atomic_fadd_f32_to_rank_4 92func.func @raw_buffer_atomic_fadd_f32_to_rank_4(%value : f32, %dst : memref<128x64x32x16xf32>, %offset : i32, %idx0 : i32, %idx1 : i32, %idx2 : i32, %idx3 : i32) { 93 // CHECK: amdgpu.raw_buffer_atomic_fadd {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] sgprOffset %{{.*}} : f32 -> memref<128x64x32x16xf32>, i32, i32, i32, i32 94 amdgpu.raw_buffer_atomic_fadd {boundsCheck = true, indexOffset = 1 : i32} %value -> %dst[%idx0, %idx1, %idx2, %idx3] sgprOffset %offset : f32 -> memref<128x64x32x16xf32>, i32, i32, i32, i32 95 func.return 96} 97 98// CHECK-LABEL: func @raw_buffer_atomic_cmpswap_f32 99func.func @raw_buffer_atomic_cmpswap_f32(%src : f32, %cmp : f32, %dst : memref<128x64x32x16xf32>, %offset : i32, %idx0 : i32, %idx1 : i32, %idx2 : i32, %idx3 : i32) { 100 // CHECK: amdgpu.raw_buffer_atomic_cmpswap {indexOffset = 1 : i32} %{{.*}}, %{{.*}} -> %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] sgprOffset %{{.*}} : f32 -> memref<128x64x32x16xf32>, i32, i32, i32, i32 101 amdgpu.raw_buffer_atomic_cmpswap {boundsCheck = true, indexOffset = 1 : i32} %src, %cmp -> %dst[%idx0, %idx1, %idx2, %idx3] sgprOffset %offset : f32 -> memref<128x64x32x16xf32>, i32, i32, i32, i32 102 func.return 103} 104 105// CHECK-LABEL: func @lds_barrier 106func.func @lds_barrier() { 107 // CHECK: amdgpu.lds_barrier 108 amdgpu.lds_barrier 109 func.return 110} 111 112// CHECK-LABEL: func @sched_barrier 113func.func @sched_barrier() { 114 // CHECK: amdgpu.sched_barrier allow = <none> 115 amdgpu.sched_barrier allow = <none> 116 // CHECK: amdgpu.sched_barrier allow = <valu|all_vmem> 117 amdgpu.sched_barrier allow = <valu|all_vmem> 118 func.return 119} 120 121// CHECK-LABEL: func @mfma 122func.func @mfma(%arg0 : f32, %arg1 : vector<32xf32>) -> vector<32xf32> { 123 // CHECK: amdgpu.mfma 124 %0 = amdgpu.mfma %arg0 * %arg0 + %arg1 { abid = 1 : i32, cbsz = 1 : i32, k = 1 : i32, m = 32 : i32, n = 32 : i32, blocks = 2 : i32 } blgp = bcast_second_32 : f32, f32, vector<32xf32> 125 func.return %0 : vector<32xf32> 126} 127 128// CHECK-LABEL: func @wmma 129func.func @wmma(%arg0 : vector<16xf16>, %arg1 : vector<8xf16>) -> vector<8xf16> { 130 // CHECK: amdgpu.wmma 131 %0 = amdgpu.wmma %arg0 * %arg0 + %arg1 : vector<16xf16>, vector<16xf16>, vector<8xf16> 132 func.return %0 : vector<8xf16> 133} 134