1// RUN: mlir-opt -gpu-async-region %s | FileCheck %s 2 3// CHECK: module attributes {gpu.container_module} 4module attributes {gpu.container_module} { 5 6 gpu.module @kernels { 7 gpu.func @kernel() kernel { gpu.return } 8 } 9 10 func.func private @foo() -> () 11 12 // CHECK-LABEL:func @async(%{{.*}}: index) 13 func.func @async(%sz : index) { 14 // CHECK: %[[t0:.*]] = gpu.wait async 15 // CHECK: %[[t1:.*]] = gpu.launch_func async [%[[t0]]] 16 gpu.launch_func @kernels::@kernel 17 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 18 // CHECK: %[[t2:.*]] = gpu.launch_func async [%[[t1]]] 19 gpu.launch_func @kernels::@kernel 20 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 21 // CHECK: %[[m:.*]], %[[t3:.*]] = gpu.alloc async [%[[t2]]] () 22 %0 = gpu.alloc() : memref<7xf32> 23 // CHECK: %[[t4:.*]] = gpu.dealloc async [%[[t3]]] %[[m]] 24 gpu.dealloc %0 : memref<7xf32> 25 // CHECK: gpu.wait [%[[t4]]] 26 // CHECK: call @foo 27 call @foo() : () -> () 28 return 29 } 30 31 // CHECK-LABEL:func @defer_wait(%{{.*}}: index) 32 func.func @defer_wait(%sz : index) { 33 // CHECK: %[[a0:.*]], %[[f0:.*]] = async.execute 34 %a0 = async.execute { 35 // CHECK: %[[t:.*]] = gpu.launch_func async 36 gpu.launch_func @kernels::@kernel 37 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 38 // CHECK-NOT: gpu.wait 39 // CHECK: async.yield %[[t]] 40 async.yield 41 } 42 43 // CHECK: %[[a1:.*]], %[[f1:.*]] = async.execute 44 // CHECK-SAME: %[[f0]] 45 %a1 = async.execute [%a0] { 46 // CHECK: %[[t:.*]] = gpu.launch_func async 47 gpu.launch_func @kernels::@kernel 48 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 49 // CHECK-NOT: gpu.wait 50 // CHECK: async.yield %[[t]] 51 async.yield 52 } 53 54 // CHECK: async.await %[[a1]] 55 // CHECK: %[[t:.*]] = async.await %[[f1]] 56 // CHECK: gpu.wait [%[[t]]] 57 async.await %a1 : !async.token 58 return 59 } 60 61 // CHECK-LABEL:func @defer_wait_blocked_by_side_effect(%{{.*}}: index) 62 func.func @defer_wait_blocked_by_side_effect(%sz : index) { 63 // CHECK: %[[a:.*]] = async.execute 64 %a = async.execute { 65 // CHECK: %[[t:.*]] = gpu.launch_func async 66 gpu.launch_func @kernels::@kernel 67 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 68 // CHECK: gpu.wait [%[[t]]] 69 func.call @foo() : () -> () 70 async.yield 71 } 72 73 // CHECK: async.await %[[a]] 74 // CHECK-NOT: gpu.wait 75 async.await %a : !async.token 76 return 77 } 78 79 // CHECK-LABEL:func @defer_wait_pass_through(%{{.*}}: index) 80 func.func @defer_wait_pass_through(%sz : index) { 81 // CHECK: %[[a0:.*]], %[[f0:.*]] = async.execute 82 %a0 = async.execute { 83 // CHECK: %[[t:.*]] = gpu.launch_func async 84 gpu.launch_func @kernels::@kernel 85 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 86 // CHECK-NOT: gpu.wait 87 // CHECK: async.yield %[[t]] 88 async.yield 89 } 90 91 // CHECK: %[[a1:.*]], %[[f1:.*]] = async.execute 92 // CHECK-SAME: %[[f0]] 93 %a1 = async.execute [%a0] { 94 // CHECK-NOT: gpu.wait 95 // CHECK: async.yield %{{.*}} 96 async.yield 97 } 98 99 // CHECK: async.await %[[a1]] 100 // CHECK: %[[t:.*]] = async.await %[[f1]] 101 // CHECK: gpu.wait [%[[t]]] 102 async.await %a1 : !async.token 103 return 104 } 105 106 // CHECK-LABEL:func @async_execute_with_result(%{{.*}}: index) 107 func.func @async_execute_with_result(%sz : index) -> index { 108 // CHECK: %[[a0:.*]], %[[f0:.*]]:2 = async.execute 109 // CHECK-SAME: -> (!async.value<index>, !async.value<!gpu.async.token>) 110 %a0, %f0 = async.execute -> !async.value<index> { 111 // CHECK: %[[t:.*]] = gpu.launch_func async 112 gpu.launch_func @kernels::@kernel 113 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 114 // CHECK-NOT: gpu.wait 115 // CHECK: async.yield {{.*}}, %[[t]] : index, !gpu.async.token 116 async.yield %sz : index 117 } 118 119 // CHECK: async.await %[[a0]] : !async.token 120 // CHECK: %[[t:.*]] = async.await %[[f0]]#1 : !async.value<!gpu.async.token> 121 // CHECK: gpu.wait [%[[t]]] 122 async.await %a0 : !async.token 123 // CHECK: %[[x:.*]] = async.await %[[f0]]#0 : !async.value<index> 124 %x = async.await %f0 : !async.value<index> 125 // CHECK: return %[[x]] : index 126 return %x : index 127 } 128 129 // CHECK-LABEL:func @async_execute_no_use(%{{.*}}: index) 130 func.func @async_execute_no_use(%sz : index) { 131 // CHECK: async.execute { 132 %a0 = async.execute { 133 // CHECK: %[[t:.*]] = gpu.launch_func async 134 gpu.launch_func @kernels::@kernel 135 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 136 // CHECK: gpu.wait [%[[t]]] 137 async.yield 138 } 139 return 140 } 141 142 // CHECK-LABEL:func @async_execute_fork(%{{.*}}: index) 143 func.func @async_execute_fork(%sz : index) { 144 // CHECK: %[[a0:.*]], %[[f0:.*]]:2 = async.execute 145 // CHECK-SAME: -> (!async.value<!gpu.async.token>, !async.value<!gpu.async.token>) 146 %a0 = async.execute { 147 // CHECK: %[[t:.*]] = gpu.launch_func async 148 gpu.launch_func @kernels::@kernel 149 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 150 // CHECK-NOT: gpu.wait 151 // CHECK: async.yield %[[t]], %[[t]] : !gpu.async.token, !gpu.async.token 152 async.yield 153 } 154 // CHECK: async.execute [%[[a0]]] (%[[f0]]#0 as {{.*}}: !async.value<!gpu.async.token>) 155 %a1 = async.execute [%a0] { 156 // CHECK: %[[t:.*]] = gpu.launch_func async 157 gpu.launch_func @kernels::@kernel 158 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 159 // CHECK: gpu.wait [%[[t]]] 160 async.yield 161 } 162 // CHECK: async.execute [%[[a0]]] (%[[f0]]#1 as {{.*}}: !async.value<!gpu.async.token>) 163 %a2 = async.execute [%a0] { 164 // CHECK: %[[t:.*]] = gpu.launch_func async 165 gpu.launch_func @kernels::@kernel 166 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz) 167 // CHECK: gpu.wait [%[[t]]] 168 async.yield 169 } 170 return 171 } 172 173 // CHECK-LABEL:func @existing_tokens() 174 func.func @existing_tokens() { 175 // CHECK: %[[t0:.*]] = gpu.wait async 176 // CHECK-NOT: [{{.*}}] 177 %t0 = gpu.wait async 178 // CHECK: %[[t1:.*]] = gpu.wait async [%[[t0]]] 179 %t1 = gpu.wait async [%t0] 180 // CHECK: %[[m:.*]], %[[t2:.*]] = gpu.alloc async [%[[t1]], %[[t0]]] () 181 %0 = gpu.alloc [%t0] () : memref<7xf32> 182 // CHECK: %[[t3:.*]] = gpu.dealloc async [%[[t2]]] %[[m]] 183 %t2 = gpu.dealloc async %0 : memref<7xf32> 184 // CHECK: gpu.wait [%[[t3]]] 185 gpu.wait 186 // CHECK: gpu.wait 187 // CHECK-NOT: async 188 // CHECK-NOT: [{{.*}}] 189 gpu.wait 190 return 191 } 192} 193