xref: /llvm-project/mlir/test/Dialect/NVGPU/roundtrip.mlir (revision 708185f03ff480b3481132802b7b63461564f0ab)
1// RUN: mlir-opt %s | mlir-opt | FileCheck %s
2
3// CHECK-LABEL: func @ldmatrix(
4func.func @ldmatrix(%arg0: memref<?x?xf16, 3>, %x: index, %y: index) {
5//      CHECK: nvgpu.ldmatrix %{{.*}}[%{{.*}}, %{{.*}}]
6// CHECK-SAME: {numTiles = 4 : i32, transpose = false} : memref<?x?xf16, 3> -> vector<4x2xf16>
7  %l = nvgpu.ldmatrix %arg0[%x, %y] {numTiles = 4 : i32, transpose = false} :
8    memref<?x?xf16, 3> -> vector<4x2xf16>
9  return
10}
11
12// CHECK-LABEL: func @mma_sync(
13func.func @mma_sync(%arg0: vector<4x2xf16>,
14               %arg1: vector<2x2xf16>,
15               %arg2: vector<2x2xf16>) -> vector<2x2xf16> {
16//       CHECK: nvgpu.mma.sync(%{{.*}}, %{{.*}}, %{{.*}}) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
17  %d = nvgpu.mma.sync(%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} :
18    (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
19  return %d : vector<2x2xf16>
20}
21
22// CHECK-LABEL: func @mma_sp_sync_f16_16832(
23func.func @mma_sp_sync_f16_16832(%arg0: vector<4x2xf16>,
24                                 %arg1: vector<4x2xf16>,
25                                 %arg2: vector<2x2xf16>,
26                                 %arg3: vector<2xi16>) -> vector<2x2xf16> {
27  //      CHECK: nvgpu.mma.sp.sync(%{{.*}}, %{{.*}}, %{{.*}}) metadata(%{{.+}}) {
28  // CHECK-SAME:   mmaShape = [16, 8, 32]
29  // CHECK-SAME: (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
30  %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 32]} :
31    (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
32  return %d : vector<2x2xf16>
33}
34
35// CHECK-LABEL: func @mma_sp_sync_f16_16816(
36func.func @mma_sp_sync_f16_16816(%arg0: vector<2x2xf16>,
37                                 %arg1: vector<2x2xf16>,
38                                 %arg2: vector<2x2xf16>,
39                                 %arg3: vector<2xi16>) -> vector<2x2xf16> {
40  //      CHECK: nvgpu.mma.sp.sync(%{{.*}}, %{{.*}}, %{{.*}}) metadata(%{{.+}}) {
41  // CHECK-SAME:   mmaShape = [16, 8, 16]
42  // CHECK-SAME: (vector<2x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
43  %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 16]} :
44    (vector<2x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16>
45  return %d : vector<2x2xf16>
46}
47
48// CHECK-LABEL: func @mma_sp_sync_i8_16864(
49func.func @mma_sp_sync_i8_16864(%arg0: vector<4x4xi8>,
50                                %arg1: vector<4x4xi8>,
51                                %arg2: vector<2x2xi32>,
52                                %arg3: vector<2xi16>) -> vector<2x2xi32> {
53  //      CHECK: nvgpu.mma.sp.sync(%{{.*}}, %{{.*}}, %{{.*}}) metadata(%{{.+}}) {
54  // CHECK-SAME:   mmaShape = [16, 8, 64]
55  // CHECK-SAME: (vector<4x4xi8>, vector<4x4xi8>, vector<2x2xi32>) -> vector<2x2xi32>
56  %d = nvgpu.mma.sp.sync(%arg0, %arg1, %arg2) metadata(%arg3) {mmaShape = [16, 8, 64]} :
57    (vector<4x4xi8>, vector<4x4xi8>, vector<2x2xi32>) -> vector<2x2xi32>
58  return %d : vector<2x2xi32>
59}
60
61func.func @async_cp(%dst : memref<2x7x5xf32, 3>, %src : memref<4x5xf32>){
62  // CHECK-LABEL: func @async_cp
63  %c0 = arith.constant 0 : index
64  // CHECK: nvgpu.device_async_copy %{{.*}}[{{.*}}, {{.*}}], %{{.*}}[{{.*}}, {{.*}}, {{.*}}], 4 : memref<4x5xf32> to memref<2x7x5xf32, 3>
65  %0 = nvgpu.device_async_copy %src[%c0, %c0], %dst[%c0, %c0, %c0], 4 : memref<4x5xf32> to memref<2x7x5xf32, 3>
66  // CHECK: %{{.*}} = nvgpu.device_async_create_group
67  %token = nvgpu.device_async_create_group %0
68  // CHECK: nvgpu.device_async_wait %{{.*}} {numGroups = 1 : i32}
69  nvgpu.device_async_wait %token {numGroups = 1 : i32}
70  return
71}
72