1// RUN: mlir-opt -int-range-optimizations -split-input-file %s | FileCheck %s 2 3// CHECK-LABEL: func @launch_func 4func.func @launch_func(%arg0 : index) { 5 %0 = test.with_bounds { 6 umin = 3 : index, umax = 5 : index, 7 smin = 3 : index, smax = 5 : index 8 } : index 9 %1 = test.with_bounds { 10 umin = 7 : index, umax = 11 : index, 11 smin = 7 : index, smax = 11 : index 12 } : index 13 gpu.launch blocks(%block_id_x, %block_id_y, %block_id_z) in (%grid_dim_x = %0, %grid_dim_y = %1, %grid_dim_z = %arg0) 14 threads(%thread_id_x, %thread_id_y, %thread_id_z) in (%block_dim_x = %arg0, %block_dim_y = %0, %block_dim_z = %1) { 15 16 // CHECK: test.reflect_bounds {smax = 5 : index, smin = 3 : index, umax = 5 : index, umin = 3 : index} 17 // CHECK: test.reflect_bounds {smax = 11 : index, smin = 7 : index, umax = 11 : index, umin = 7 : index} 18 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 19 %grid_dim_x0 = test.reflect_bounds %grid_dim_x : index 20 %grid_dim_y0 = test.reflect_bounds %grid_dim_y : index 21 %grid_dim_z0 = test.reflect_bounds %grid_dim_z : index 22 23 // CHECK: test.reflect_bounds {smax = 4 : index, smin = 0 : index, umax = 4 : index, umin = 0 : index} 24 // CHECK: test.reflect_bounds {smax = 10 : index, smin = 0 : index, umax = 10 : index, umin = 0 : index} 25 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 26 %block_id_x0 = test.reflect_bounds %block_id_x : index 27 %block_id_y0 = test.reflect_bounds %block_id_y : index 28 %block_id_z0 = test.reflect_bounds %block_id_z : index 29 30 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 31 // CHECK: test.reflect_bounds {smax = 5 : index, smin = 3 : index, umax = 5 : index, umin = 3 : index} 32 // CHECK: test.reflect_bounds {smax = 11 : index, smin = 7 : index, umax = 11 : index, umin = 7 : index} 33 %block_dim_x0 = test.reflect_bounds %block_dim_x : index 34 %block_dim_y0 = test.reflect_bounds %block_dim_y : index 35 %block_dim_z0 = test.reflect_bounds %block_dim_z : index 36 37 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 38 // CHECK: test.reflect_bounds {smax = 4 : index, smin = 0 : index, umax = 4 : index, umin = 0 : index} 39 // CHECK: test.reflect_bounds {smax = 10 : index, smin = 0 : index, umax = 10 : index, umin = 0 : index} 40 %thread_id_x0 = test.reflect_bounds %thread_id_x : index 41 %thread_id_y0 = test.reflect_bounds %thread_id_y : index 42 %thread_id_z0 = test.reflect_bounds %thread_id_z : index 43 44 // The launch bounds are not constant, and so this can't infer anything 45 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 46 %thread_id_op = gpu.thread_id y 47 %thread_id_op0 = test.reflect_bounds %thread_id_op : index 48 gpu.terminator 49 } 50 51 func.return 52} 53 54// ----- 55 56// CHECK-LABEL: func @kernel 57module attributes {gpu.container_module} { 58 gpu.module @gpu_module { 59 llvm.func @kernel() attributes {gpu.kernel} { 60 61 %grid_dim_x = gpu.grid_dim x 62 %grid_dim_y = gpu.grid_dim y 63 %grid_dim_z = gpu.grid_dim z 64 65 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 66 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 67 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 68 %grid_dim_x0 = test.reflect_bounds %grid_dim_x : index 69 %grid_dim_y0 = test.reflect_bounds %grid_dim_y : index 70 %grid_dim_z0 = test.reflect_bounds %grid_dim_z : index 71 72 %block_id_x = gpu.block_id x 73 %block_id_y = gpu.block_id y 74 %block_id_z = gpu.block_id z 75 76 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 77 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 78 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 79 %block_id_x0 = test.reflect_bounds %block_id_x : index 80 %block_id_y0 = test.reflect_bounds %block_id_y : index 81 %block_id_z0 = test.reflect_bounds %block_id_z : index 82 83 %block_dim_x = gpu.block_dim x 84 %block_dim_y = gpu.block_dim y 85 %block_dim_z = gpu.block_dim z 86 87 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 88 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 89 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 90 %block_dim_x0 = test.reflect_bounds %block_dim_x : index 91 %block_dim_y0 = test.reflect_bounds %block_dim_y : index 92 %block_dim_z0 = test.reflect_bounds %block_dim_z : index 93 94 %thread_id_x = gpu.thread_id x 95 %thread_id_y = gpu.thread_id y 96 %thread_id_z = gpu.thread_id z 97 98 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 99 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 100 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 101 %thread_id_x0 = test.reflect_bounds %thread_id_x : index 102 %thread_id_y0 = test.reflect_bounds %thread_id_y : index 103 %thread_id_z0 = test.reflect_bounds %thread_id_z : index 104 105 %global_id_x = gpu.global_id x 106 %global_id_y = gpu.global_id y 107 %global_id_z = gpu.global_id z 108 109 // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = -9223372036854775808 : index, umax = -8589934592 : index, umin = 0 : index} 110 // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = -9223372036854775808 : index, umax = -8589934592 : index, umin = 0 : index} 111 // CHECK: test.reflect_bounds {smax = 9223372036854775807 : index, smin = -9223372036854775808 : index, umax = -8589934592 : index, umin = 0 : index} 112 %global_id_x0 = test.reflect_bounds %global_id_x : index 113 %global_id_y0 = test.reflect_bounds %global_id_y : index 114 %global_id_z0 = test.reflect_bounds %global_id_z : index 115 116 %subgroup_size = gpu.subgroup_size : index 117 %lane_id = gpu.lane_id 118 %num_subgroups = gpu.num_subgroups : index 119 %subgroup_id = gpu.subgroup_id : index 120 121 // CHECK: test.reflect_bounds {smax = 128 : index, smin = 1 : index, umax = 128 : index, umin = 1 : index} 122 // CHECK: test.reflect_bounds {smax = 127 : index, smin = 0 : index, umax = 127 : index, umin = 0 : index} 123 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 124 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 125 %subgroup_size0 = test.reflect_bounds %subgroup_size : index 126 %lane_id0 = test.reflect_bounds %lane_id : index 127 %num_subgroups0 = test.reflect_bounds %num_subgroups : index 128 %subgroup_id0 = test.reflect_bounds %subgroup_id : index 129 130 llvm.return 131 } 132 } 133} 134 135// ----- 136 137// CHECK-LABEL: func @annotated_kernel 138module attributes {gpu.container_module} { 139 gpu.module @gpu_module { 140 gpu.func @annotated_kernel() kernel 141 attributes {known_block_size = array<i32: 8, 12, 16>, 142 known_grid_size = array<i32: 20, 24, 28>} { 143 144 %grid_dim_x = gpu.grid_dim x 145 %grid_dim_y = gpu.grid_dim y 146 %grid_dim_z = gpu.grid_dim z 147 148 // CHECK: test.reflect_bounds {smax = 20 : index, smin = 20 : index, umax = 20 : index, umin = 20 : index} 149 // CHECK: test.reflect_bounds {smax = 24 : index, smin = 24 : index, umax = 24 : index, umin = 24 : index} 150 // CHECK: test.reflect_bounds {smax = 28 : index, smin = 28 : index, umax = 28 : index, umin = 28 : index} 151 %grid_dim_x0 = test.reflect_bounds %grid_dim_x : index 152 %grid_dim_y0 = test.reflect_bounds %grid_dim_y : index 153 %grid_dim_z0 = test.reflect_bounds %grid_dim_z : index 154 155 %block_id_x = gpu.block_id x 156 %block_id_y = gpu.block_id y 157 %block_id_z = gpu.block_id z 158 159 // CHECK: test.reflect_bounds {smax = 19 : index, smin = 0 : index, umax = 19 : index, umin = 0 : index} 160 // CHECK: test.reflect_bounds {smax = 23 : index, smin = 0 : index, umax = 23 : index, umin = 0 : index} 161 // CHECK: test.reflect_bounds {smax = 27 : index, smin = 0 : index, umax = 27 : index, umin = 0 : index} 162 %block_id_x0 = test.reflect_bounds %block_id_x : index 163 %block_id_y0 = test.reflect_bounds %block_id_y : index 164 %block_id_z0 = test.reflect_bounds %block_id_z : index 165 166 %block_dim_x = gpu.block_dim x 167 %block_dim_y = gpu.block_dim y 168 %block_dim_z = gpu.block_dim z 169 170 // CHECK: test.reflect_bounds {smax = 8 : index, smin = 8 : index, umax = 8 : index, umin = 8 : index} 171 // CHECK: test.reflect_bounds {smax = 12 : index, smin = 12 : index, umax = 12 : index, umin = 12 : index} 172 // CHECK: test.reflect_bounds {smax = 16 : index, smin = 16 : index, umax = 16 : index, umin = 16 : index} 173 %block_dim_x0 = test.reflect_bounds %block_dim_x : index 174 %block_dim_y0 = test.reflect_bounds %block_dim_y : index 175 %block_dim_z0 = test.reflect_bounds %block_dim_z : index 176 177 %thread_id_x = gpu.thread_id x 178 %thread_id_y = gpu.thread_id y 179 %thread_id_z = gpu.thread_id z 180 181 // CHECK: test.reflect_bounds {smax = 7 : index, smin = 0 : index, umax = 7 : index, umin = 0 : index} 182 // CHECK: test.reflect_bounds {smax = 11 : index, smin = 0 : index, umax = 11 : index, umin = 0 : index} 183 // CHECK: test.reflect_bounds {smax = 15 : index, smin = 0 : index, umax = 15 : index, umin = 0 : index} 184 %thread_id_x0 = test.reflect_bounds %thread_id_x : index 185 %thread_id_y0 = test.reflect_bounds %thread_id_y : index 186 %thread_id_z0 = test.reflect_bounds %thread_id_z : index 187 188 %global_id_x = gpu.global_id x 189 %global_id_y = gpu.global_id y 190 %global_id_z = gpu.global_id z 191 192 // CHECK: test.reflect_bounds {smax = 159 : index, smin = 0 : index, umax = 159 : index, umin = 0 : index} 193 // CHECK: test.reflect_bounds {smax = 287 : index, smin = 0 : index, umax = 287 : index, umin = 0 : index} 194 // CHECK: test.reflect_bounds {smax = 447 : index, smin = 0 : index, umax = 447 : index, umin = 0 : index} 195 %global_id_x0 = test.reflect_bounds %global_id_x : index 196 %global_id_y0 = test.reflect_bounds %global_id_y : index 197 %global_id_z0 = test.reflect_bounds %global_id_z : index 198 199 %subgroup_size = gpu.subgroup_size : index 200 %lane_id = gpu.lane_id 201 %num_subgroups = gpu.num_subgroups : index 202 %subgroup_id = gpu.subgroup_id : index 203 204 // CHECK: test.reflect_bounds {smax = 128 : index, smin = 1 : index, umax = 128 : index, umin = 1 : index} 205 // CHECK: test.reflect_bounds {smax = 127 : index, smin = 0 : index, umax = 127 : index, umin = 0 : index} 206 // CHECK: test.reflect_bounds {smax = 4294967295 : index, smin = 1 : index, umax = 4294967295 : index, umin = 1 : index} 207 // CHECK: test.reflect_bounds {smax = 4294967294 : index, smin = 0 : index, umax = 4294967294 : index, umin = 0 : index} 208 %subgroup_size0 = test.reflect_bounds %subgroup_size : index 209 %lane_id0 = test.reflect_bounds %lane_id : index 210 %num_subgroups0 = test.reflect_bounds %num_subgroups : index 211 %subgroup_id0 = test.reflect_bounds %subgroup_id : index 212 213 gpu.return 214 } 215 } 216} 217 218// ----- 219 220// CHECK-LABEL: func @annotated_kernel 221module { 222 func.func @annotated_kernel() 223 attributes {gpu.known_block_size = array<i32: 8, 12, 16>, 224 gpu.known_grid_size = array<i32: 20, 24, 28>} { 225 226 %block_id_x = gpu.block_id x 227 %block_id_y = gpu.block_id y 228 %block_id_z = gpu.block_id z 229 230 // CHECK: test.reflect_bounds {smax = 19 : index, smin = 0 : index, umax = 19 : index, umin = 0 : index} 231 // CHECK: test.reflect_bounds {smax = 23 : index, smin = 0 : index, umax = 23 : index, umin = 0 : index} 232 // CHECK: test.reflect_bounds {smax = 27 : index, smin = 0 : index, umax = 27 : index, umin = 0 : index} 233 %block_id_x0 = test.reflect_bounds %block_id_x : index 234 %block_id_y0 = test.reflect_bounds %block_id_y : index 235 %block_id_z0 = test.reflect_bounds %block_id_z : index 236 237 %thread_id_x = gpu.thread_id x 238 %thread_id_y = gpu.thread_id y 239 %thread_id_z = gpu.thread_id z 240 241 // CHECK: test.reflect_bounds {smax = 7 : index, smin = 0 : index, umax = 7 : index, umin = 0 : index} 242 // CHECK: test.reflect_bounds {smax = 11 : index, smin = 0 : index, umax = 11 : index, umin = 0 : index} 243 // CHECK: test.reflect_bounds {smax = 15 : index, smin = 0 : index, umax = 15 : index, umin = 0 : index} 244 %thread_id_x0 = test.reflect_bounds %thread_id_x : index 245 %thread_id_y0 = test.reflect_bounds %thread_id_y : index 246 %thread_id_z0 = test.reflect_bounds %thread_id_z : index 247 248 return 249 } 250} 251 252// ----- 253 254// CHECK-LABEL: func @local_bounds_kernel 255module attributes {gpu.container_module} { 256 gpu.module @gpu_module { 257 gpu.func @local_bounds_kernel() kernel { 258 259 %grid_dim_x = gpu.grid_dim x upper_bound 20 260 %grid_dim_y = gpu.grid_dim y upper_bound 24 261 %grid_dim_z = gpu.grid_dim z upper_bound 28 262 263 // CHECK: test.reflect_bounds {smax = 20 : index, smin = 1 : index, umax = 20 : index, umin = 1 : index} 264 // CHECK: test.reflect_bounds {smax = 24 : index, smin = 1 : index, umax = 24 : index, umin = 1 : index} 265 // CHECK: test.reflect_bounds {smax = 28 : index, smin = 1 : index, umax = 28 : index, umin = 1 : index} 266 %grid_dim_x0 = test.reflect_bounds %grid_dim_x : index 267 %grid_dim_y0 = test.reflect_bounds %grid_dim_y : index 268 %grid_dim_z0 = test.reflect_bounds %grid_dim_z : index 269 270 %block_id_x = gpu.block_id x upper_bound 20 271 %block_id_y = gpu.block_id y upper_bound 24 272 %block_id_z = gpu.block_id z upper_bound 28 273 274 // CHECK: test.reflect_bounds {smax = 19 : index, smin = 0 : index, umax = 19 : index, umin = 0 : index} 275 // CHECK: test.reflect_bounds {smax = 23 : index, smin = 0 : index, umax = 23 : index, umin = 0 : index} 276 // CHECK: test.reflect_bounds {smax = 27 : index, smin = 0 : index, umax = 27 : index, umin = 0 : index} 277 %block_id_x0 = test.reflect_bounds %block_id_x : index 278 %block_id_y0 = test.reflect_bounds %block_id_y : index 279 %block_id_z0 = test.reflect_bounds %block_id_z : index 280 281 %block_dim_x = gpu.block_dim x upper_bound 8 282 %block_dim_y = gpu.block_dim y upper_bound 12 283 %block_dim_z = gpu.block_dim z upper_bound 16 284 285 // CHECK: test.reflect_bounds {smax = 8 : index, smin = 1 : index, umax = 8 : index, umin = 1 : index} 286 // CHECK: test.reflect_bounds {smax = 12 : index, smin = 1 : index, umax = 12 : index, umin = 1 : index} 287 // CHECK: test.reflect_bounds {smax = 16 : index, smin = 1 : index, umax = 16 : index, umin = 1 : index} 288 %block_dim_x0 = test.reflect_bounds %block_dim_x : index 289 %block_dim_y0 = test.reflect_bounds %block_dim_y : index 290 %block_dim_z0 = test.reflect_bounds %block_dim_z : index 291 292 %thread_id_x = gpu.thread_id x upper_bound 8 293 %thread_id_y = gpu.thread_id y upper_bound 12 294 %thread_id_z = gpu.thread_id z upper_bound 16 295 296 // CHECK: test.reflect_bounds {smax = 7 : index, smin = 0 : index, umax = 7 : index, umin = 0 : index} 297 // CHECK: test.reflect_bounds {smax = 11 : index, smin = 0 : index, umax = 11 : index, umin = 0 : index} 298 // CHECK: test.reflect_bounds {smax = 15 : index, smin = 0 : index, umax = 15 : index, umin = 0 : index} 299 %thread_id_x0 = test.reflect_bounds %thread_id_x : index 300 %thread_id_y0 = test.reflect_bounds %thread_id_y : index 301 %thread_id_z0 = test.reflect_bounds %thread_id_z : index 302 303 %global_id_x = gpu.global_id x upper_bound 160 304 %global_id_y = gpu.global_id y upper_bound 288 305 %global_id_z = gpu.global_id z upper_bound 448 306 307 // CHECK: test.reflect_bounds {smax = 159 : index, smin = 0 : index, umax = 159 : index, umin = 0 : index} 308 // CHECK: test.reflect_bounds {smax = 287 : index, smin = 0 : index, umax = 287 : index, umin = 0 : index} 309 // CHECK: test.reflect_bounds {smax = 447 : index, smin = 0 : index, umax = 447 : index, umin = 0 : index} 310 %global_id_x0 = test.reflect_bounds %global_id_x : index 311 %global_id_y0 = test.reflect_bounds %global_id_y : index 312 %global_id_z0 = test.reflect_bounds %global_id_z : index 313 314 %subgroup_size = gpu.subgroup_size upper_bound 32 : index 315 %subgroup_id = gpu.subgroup_id upper_bound 32 : index 316 %num_subgroups = gpu.num_subgroups upper_bound 8 : index 317 %lane_id = gpu.lane_id upper_bound 64 318 319 // CHECK: test.reflect_bounds {smax = 32 : index, smin = 1 : index, umax = 32 : index, umin = 1 : index} 320 // CHECK: test.reflect_bounds {smax = 31 : index, smin = 0 : index, umax = 31 : index, umin = 0 : index} 321 // CHECK: test.reflect_bounds {smax = 8 : index, smin = 1 : index, umax = 8 : index, umin = 1 : index} 322 // CHECK: test.reflect_bounds {smax = 63 : index, smin = 0 : index, umax = 63 : index, umin = 0 : index} 323 %subgroup_size0 = test.reflect_bounds %subgroup_size : index 324 %subgroup_id0 = test.reflect_bounds %subgroup_id : index 325 %num_subgroups0 = test.reflect_bounds %num_subgroups : index 326 %lane_id0 = test.reflect_bounds %lane_id : index 327 328 gpu.return 329 } 330 } 331} 332