xref: /llvm-project/mlir/test/Dialect/GPU/async-region.mlir (revision 44027f390854f3088bf85c5c4d8cf1405dd4bb4c)
1// RUN: mlir-opt -gpu-async-region %s | FileCheck %s
2
3// CHECK: module attributes {gpu.container_module}
4module attributes {gpu.container_module} {
5
6  gpu.module @kernels {
7    gpu.func @kernel() kernel { gpu.return }
8  }
9
10  func.func private @foo() -> ()
11
12  // CHECK-LABEL:func @async(%{{.*}}: index)
13  func.func @async(%sz : index) {
14    // CHECK: %[[t0:.*]] = gpu.wait async
15    // CHECK: %[[t1:.*]] = gpu.launch_func async [%[[t0]]]
16    gpu.launch_func @kernels::@kernel
17        blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
18    // CHECK: %[[t2:.*]] = gpu.launch_func async [%[[t1]]]
19    gpu.launch_func @kernels::@kernel
20        blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
21    // CHECK: %[[m:.*]], %[[t3:.*]] = gpu.alloc async [%[[t2]]] ()
22    %0 = gpu.alloc() : memref<7xf32>
23    // CHECK: %[[t4:.*]] = gpu.dealloc async [%[[t3]]] %[[m]]
24    gpu.dealloc %0 : memref<7xf32>
25    // CHECK: gpu.wait [%[[t4]]]
26    // CHECK: call @foo
27    call @foo() : () -> ()
28    return
29  }
30
31  // CHECK-LABEL:func @defer_wait(%{{.*}}: index)
32  func.func @defer_wait(%sz : index) {
33    // CHECK: %[[a0:.*]], %[[f0:.*]] = async.execute
34    %a0 = async.execute {
35      // CHECK: %[[t:.*]] = gpu.launch_func async
36      gpu.launch_func @kernels::@kernel
37          blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
38      // CHECK-NOT: gpu.wait
39      // CHECK: async.yield %[[t]]
40      async.yield
41    }
42
43    // CHECK: %[[a1:.*]], %[[f1:.*]] = async.execute
44    // CHECK-SAME: %[[f0]]
45    %a1 = async.execute [%a0] {
46      // CHECK: %[[t:.*]] = gpu.launch_func async
47      gpu.launch_func @kernels::@kernel
48          blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
49      // CHECK-NOT: gpu.wait
50      // CHECK: async.yield %[[t]]
51      async.yield
52    }
53
54    // CHECK: async.await %[[a1]]
55    // CHECK: %[[t:.*]] = async.await %[[f1]]
56    // CHECK: gpu.wait [%[[t]]]
57    async.await %a1 : !async.token
58    return
59  }
60
61  // CHECK-LABEL:func @defer_wait_blocked_by_side_effect(%{{.*}}: index)
62  func.func @defer_wait_blocked_by_side_effect(%sz : index) {
63    // CHECK: %[[a:.*]] = async.execute
64    %a = async.execute {
65      // CHECK: %[[t:.*]] = gpu.launch_func async
66      gpu.launch_func @kernels::@kernel
67          blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
68      // CHECK: gpu.wait [%[[t]]]
69      func.call @foo() : () -> ()
70      async.yield
71    }
72
73    // CHECK: async.await %[[a]]
74    // CHECK-NOT: gpu.wait
75    async.await %a : !async.token
76    return
77  }
78
79  // CHECK-LABEL:func @defer_wait_pass_through(%{{.*}}: index)
80  func.func @defer_wait_pass_through(%sz : index) {
81    // CHECK: %[[a0:.*]], %[[f0:.*]] = async.execute
82    %a0 = async.execute {
83      // CHECK: %[[t:.*]] = gpu.launch_func async
84      gpu.launch_func @kernels::@kernel
85          blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
86      // CHECK-NOT: gpu.wait
87      // CHECK: async.yield %[[t]]
88      async.yield
89    }
90
91    // CHECK: %[[a1:.*]], %[[f1:.*]] = async.execute
92    // CHECK-SAME: %[[f0]]
93    %a1 = async.execute [%a0] {
94      // CHECK-NOT: gpu.wait
95      // CHECK: async.yield %{{.*}}
96      async.yield
97    }
98
99    // CHECK: async.await %[[a1]]
100    // CHECK: %[[t:.*]] = async.await %[[f1]]
101    // CHECK: gpu.wait [%[[t]]]
102    async.await %a1 : !async.token
103    return
104  }
105
106  // CHECK-LABEL:func @async_execute_with_result(%{{.*}}: index)
107  func.func @async_execute_with_result(%sz : index) -> index {
108    // CHECK: %[[a0:.*]], %[[f0:.*]]:2 = async.execute
109    // CHECK-SAME: -> (!async.value<index>, !async.value<!gpu.async.token>)
110    %a0, %f0 = async.execute -> !async.value<index> {
111      // CHECK: %[[t:.*]] = gpu.launch_func async
112      gpu.launch_func @kernels::@kernel
113          blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
114      // CHECK-NOT: gpu.wait
115      // CHECK: async.yield {{.*}}, %[[t]] : index, !gpu.async.token
116      async.yield %sz : index
117    }
118
119    // CHECK: async.await %[[a0]] : !async.token
120    // CHECK: %[[t:.*]] = async.await %[[f0]]#1 : !async.value<!gpu.async.token>
121    // CHECK: gpu.wait [%[[t]]]
122    async.await %a0 : !async.token
123    // CHECK: %[[x:.*]] = async.await %[[f0]]#0 : !async.value<index>
124    %x = async.await %f0 : !async.value<index>
125    // CHECK: return %[[x]] : index
126    return %x : index
127  }
128
129  // CHECK-LABEL:func @async_execute_no_use(%{{.*}}: index)
130  func.func @async_execute_no_use(%sz : index) {
131    // CHECK: async.execute {
132    %a0 = async.execute {
133      // CHECK: %[[t:.*]] = gpu.launch_func async
134      gpu.launch_func @kernels::@kernel
135          blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
136      // CHECK: gpu.wait [%[[t]]]
137      async.yield
138    }
139    return
140  }
141
142  // CHECK-LABEL:func @async_execute_fork(%{{.*}}: index)
143  func.func @async_execute_fork(%sz : index) {
144    // CHECK: %[[a0:.*]], %[[f0:.*]]:2 = async.execute
145    // CHECK-SAME: -> (!async.value<!gpu.async.token>, !async.value<!gpu.async.token>)
146    %a0 = async.execute {
147      // CHECK: %[[t:.*]] = gpu.launch_func async
148      gpu.launch_func @kernels::@kernel
149          blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
150      // CHECK-NOT: gpu.wait
151      // CHECK: async.yield %[[t]], %[[t]] : !gpu.async.token, !gpu.async.token
152      async.yield
153    }
154    // CHECK: async.execute [%[[a0]]] (%[[f0]]#0 as {{.*}}: !async.value<!gpu.async.token>)
155    %a1 = async.execute [%a0] {
156      // CHECK: %[[t:.*]] = gpu.launch_func async
157      gpu.launch_func @kernels::@kernel
158          blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
159      // CHECK: gpu.wait [%[[t]]]
160      async.yield
161    }
162    // CHECK: async.execute [%[[a0]]] (%[[f0]]#1 as {{.*}}: !async.value<!gpu.async.token>)
163    %a2 = async.execute [%a0] {
164      // CHECK: %[[t:.*]] = gpu.launch_func async
165      gpu.launch_func @kernels::@kernel
166          blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
167      // CHECK: gpu.wait [%[[t]]]
168      async.yield
169    }
170    return
171  }
172
173  // CHECK-LABEL:func @existing_tokens()
174  func.func @existing_tokens() {
175    // CHECK: %[[t0:.*]] = gpu.wait async
176    // CHECK-NOT: [{{.*}}]
177    %t0 = gpu.wait async
178    // CHECK: %[[t1:.*]] = gpu.wait async [%[[t0]]]
179    %t1 = gpu.wait async [%t0]
180    // CHECK: %[[m:.*]], %[[t2:.*]] = gpu.alloc async [%[[t1]], %[[t0]]] ()
181    %0 = gpu.alloc [%t0] () : memref<7xf32>
182    // CHECK: %[[t3:.*]] = gpu.dealloc async [%[[t2]]] %[[m]]
183    %t2 = gpu.dealloc async %0 : memref<7xf32>
184    // CHECK: gpu.wait [%[[t3]]]
185    gpu.wait
186    // CHECK: gpu.wait
187    // CHECK-NOT: async
188    // CHECK-NOT: [{{.*}}]
189    gpu.wait
190    return
191  }
192}
193