xref: /llvm-project/mlir/test/Dialect/GPU/promotion.mlir (revision 7060422265902f11a13f785a1a0ba246eff96114)
1
2// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='builtin.module(gpu.module(gpu.func(test-gpu-memory-promotion)))' -split-input-file %s | FileCheck %s
3
4gpu.module @foo {
5
6  // Verify that the attribution was indeed introduced
7  // CHECK-LABEL: @memref3d
8  // CHECK-SAME: (%[[arg:.*]]: memref<5x4xf32>
9  // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<5x4xf32, #gpu.address_space<workgroup>>)
10  gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel {
11    // Verify that loop bounds are emitted, the order does not matter.
12    // CHECK-DAG: %[[c1:.*]] = arith.constant 1
13    // CHECK-DAG: %[[c4:.*]] = arith.constant 4
14    // CHECK-DAG: %[[c5:.*]] = arith.constant 5
15    // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x
16    // CHECK-DAG: %[[ty:.*]] = gpu.thread_id y
17    // CHECK-DAG: %[[tz:.*]] = gpu.thread_id z
18    // CHECK-DAG: %[[bdx:.*]] = gpu.block_dim x
19    // CHECK-DAG: %[[bdy:.*]] = gpu.block_dim y
20    // CHECK-DAG: %[[bdz:.*]] = gpu.block_dim z
21
22    // Verify that loops for the copy are emitted. We only check the number of
23    // loops here since their bounds are produced by mapLoopToProcessorIds,
24    // tested separately.
25    // CHECK: scf.for %[[i0:.*]] =
26    // CHECK:   scf.for %[[i1:.*]] =
27    // CHECK:     scf.for %[[i2:.*]] =
28
29    // Verify that the copy is emitted and uses only the last two loops.
30    // CHECK:       %[[v:.*]] = memref.load %[[arg]][%[[i1]], %[[i2]]]
31    // CHECK:       store %[[v]], %[[promoted]][%[[i1]], %[[i2]]]
32
33    // Verify that the use has been rewritten.
34    // CHECK: "use"(%[[promoted]]) : (memref<5x4xf32, #gpu.address_space<workgroup>>)
35    "use"(%arg0) : (memref<5x4xf32>) -> ()
36
37
38    // Verify that loops for the copy are emitted. We only check the number of
39    // loops here since their bounds are produced by mapLoopToProcessorIds,
40    // tested separately.
41    // CHECK: scf.for %[[i0:.*]] =
42    // CHECK:   scf.for %[[i1:.*]] =
43    // CHECK:     scf.for %[[i2:.*]] =
44
45    // Verify that the copy is emitted and uses only the last two loops.
46    // CHECK:       %[[v:.*]] = memref.load %[[promoted]][%[[i1]], %[[i2]]]
47    // CHECK:       store %[[v]], %[[arg]][%[[i1]], %[[i2]]]
48    gpu.return
49  }
50}
51
52// -----
53
54gpu.module @foo {
55
56  // Verify that the attribution was indeed introduced
57  // CHECK-LABEL: @memref5d
58  // CHECK-SAME: (%[[arg:.*]]: memref<8x7x6x5x4xf32>
59  // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<8x7x6x5x4xf32, #gpu.address_space<workgroup>>)
60  gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel {
61    // Verify that loop bounds are emitted, the order does not matter.
62    // CHECK-DAG: %[[c0:.*]] = arith.constant 0
63    // CHECK-DAG: %[[c1:.*]] = arith.constant 1
64    // CHECK-DAG: %[[c4:.*]] = arith.constant 4
65    // CHECK-DAG: %[[c5:.*]] = arith.constant 5
66    // CHECK-DAG: %[[c6:.*]] = arith.constant 6
67    // CHECK-DAG: %[[c7:.*]] = arith.constant 7
68    // CHECK-DAG: %[[c8:.*]] = arith.constant 8
69    // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x
70    // CHECK-DAG: %[[ty:.*]] = gpu.thread_id y
71    // CHECK-DAG: %[[tz:.*]] = gpu.thread_id z
72    // CHECK-DAG: %[[bdx:.*]] = gpu.block_dim x
73    // CHECK-DAG: %[[bdy:.*]] = gpu.block_dim y
74    // CHECK-DAG: %[[bdz:.*]] = gpu.block_dim z
75
76    // Verify that loops for the copy are emitted.
77    // CHECK: scf.for %[[i0:.*]] =
78    // CHECK:   scf.for %[[i1:.*]] =
79    // CHECK:     scf.for %[[i2:.*]] =
80    // CHECK:       scf.for %[[i3:.*]] =
81    // CHECK:         scf.for %[[i4:.*]] =
82
83    // Verify that the copy is emitted.
84    // CHECK:           %[[v:.*]] = memref.load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
85    // CHECK:           store %[[v]], %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
86
87    // Verify that the use has been rewritten.
88    // CHECK: "use"(%[[promoted]]) : (memref<8x7x6x5x4xf32, #gpu.address_space<workgroup>>)
89    "use"(%arg0) : (memref<8x7x6x5x4xf32>) -> ()
90
91    // Verify that loop loops for the copy are emitted.
92    // CHECK: scf.for %[[i0:.*]] =
93    // CHECK:   scf.for %[[i1:.*]] =
94    // CHECK:     scf.for %[[i2:.*]] =
95    // CHECK:       scf.for %[[i3:.*]] =
96    // CHECK:         scf.for %[[i4:.*]] =
97
98    // Verify that the copy is emitted.
99    // CHECK:           %[[v:.*]] = memref.load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
100    // CHECK:           store %[[v]], %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
101    gpu.return
102  }
103}
104
105// -----
106
107gpu.module @foo {
108
109  // Check that attribution insertion works fine.
110  // CHECK-LABEL: @insert
111  // CHECK-SAME: (%{{.*}}: memref<4xf32>
112  // CHECK-SAME: workgroup(%{{.*}}: memref<1x1xf64, #gpu.address_space<workgroup>>
113  // CHECK-SAME: %[[wg2:.*]] : memref<4xf32, #gpu.address_space<workgroup>>)
114  // CHECK-SAME: private(%{{.*}}: memref<1x1xi64, 5>)
115  gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup})
116      workgroup(%arg1: memref<1x1xf64, #gpu.address_space<workgroup>>)
117      private(%arg2: memref<1x1xi64, 5>)
118      kernel {
119    // CHECK: "use"(%[[wg2]])
120    "use"(%arg0) : (memref<4xf32>) -> ()
121    gpu.return
122  }
123}
124