Lines Matching refs:gpu
1 // RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s
2 // RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file %s | FileCheck --check-prefix CHECK-DL %s
4 // CHECK: module attributes {gpu.container_module}
25 // CHECK: gpu.launch_func @launch_kernel::@launch_kernel blocks in (%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]]) threads in (%[[BDIMX]], %[[BDIMY]], %[[BDIMZ]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref<?xf32, 1>)
26 // CHECK-NOT: gpu.launch blocks
27 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY,
34 gpu.terminator
39 // CHECK-DL-LABEL: gpu.module @launch_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
40 // CHECK-LABEL: gpu.module @launch_kernel
41 // CHECK-NEXT: gpu.func @launch_kernel
45 // CHECK-NEXT: %[[BID:.*]] = gpu.block_id x
46 // CHECK-NEXT: = gpu.block_id y
47 // CHECK-NEXT: = gpu.block_id z
48 // CHECK-NEXT: %[[TID:.*]] = gpu.thread_id x
49 // CHECK-NEXT: = gpu.thread_id y
50 // CHECK-NEXT: = gpu.thread_id z
51 // CHECK-NEXT: = gpu.grid_dim x
52 // CHECK-NEXT: = gpu.grid_dim y
53 // CHECK-NEXT: = gpu.grid_dim z
54 // CHECK-NEXT: %[[BDIM:.*]] = gpu.block_dim x
55 // CHECK-NEXT: = gpu.block_dim y
56 // CHECK-NEXT: = gpu.block_dim z
64 // CHECK-LABEL: gpu.func @launchCFG_kernel(
66 // CHECK: gpu.return
77 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY,
86 gpu.terminator
94 // This test checks gpu-out-lining can handle gpu.launch kernel from an llvm.func
105 // CHECK: gpu.launch_func @launch_from_llvm_func_kernel::@launch_from_llvm_func_kernel
110 // CHECK: gpu.func {{.*}} kernel attributes
113 // CHECK: gpu.return
114 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %dim, %grid_y = %dim,
121 gpu.terminator
126 // CHECK-DL-LABEL: gpu.module @launch_from_llvm_func_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
130 // CHECK: module attributes {gpu.container_module}
135 // CHECK: gpu.launch_func @multiple_launches_kernel::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]])
136 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
140 gpu.terminator
142 // CHECK: gpu.launch_func @multiple_launches_kernel_0::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]])
143 gpu.launch blocks(%bx2, %by2, %bz2) in (%grid_x2 = %cst, %grid_y2 = %cst,
147 gpu.terminator
151 // CHECK: %[[TOKEN:.*]] = gpu.wait async
152 // CHECK: gpu.launch_func async [%[[TOKEN]]] @multiple_launches_kernel_1::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]])
153 %t = gpu.wait async
154 %u = gpu.launch async [%t] blocks(%bx2, %by2, %bz2) in (%grid_x2 = %cst, %grid_y2 = %cst,
158 gpu.terminator
161 // CHECK: gpu.launch_func async @multiple_launches_kernel_2::@multiple_launches_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]])
162 %v = gpu.launch async blocks(%bx2, %by2, %bz2) in (%grid_x2 = %cst, %grid_y2 = %cst,
166 gpu.terminator
172 // CHECK-DL-LABEL: gpu.module @multiple_launches_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
173 // CHECK-DL-LABEL: gpu.module @multiple_launches_kernel_0 attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
175 // CHECK: gpu.module @multiple_launches_kernel
189 // CHECK: gpu.launch_func @extra_constants_not_inlined_kernel::@extra_constants_not_inlined_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args({{.*}} : memref<?xf32>, {{.*}} : index)
190 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
195 gpu.terminator
200 // CHECK-DL-LABEL: gpu.module @extra_constants_not_inlined_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
215 // CHECK: gpu.launch_func @extra_constants_kernel::@extra_constants_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref<?xf32>)
216 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
221 gpu.terminator
226 // CHECK-DL-LABEL: gpu.module @extra_constants_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
245 // CHECK: gpu.launch_func @extra_constants_noarg_kernel::@extra_constants_noarg_kernel blocks in (%[[CST]], %[[CST]], %[[CST]]) threads in (%[[CST]], %[[CST]], %[[CST]]) args(%[[ARG0]] : memref<?xf32>, {{.*}} : index)
246 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
251 gpu.terminator
256 // CHECK-DL-LABEL: gpu.module @extra_constants_noarg_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
269 // CHECK: gpu.func {{.*}} {
273 // CHECK: gpu.return
275 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1,
281 gpu.terminator
286 // CHECK-DL-LABEL: gpu.module @multiple_uses_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
295 // CHECK: gpu.func {{.*}} {
301 // CHECK: gpu.return
303 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1,
310 gpu.terminator
315 // CHECK-DL-LABEL: gpu.module @multiple_uses2_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
324 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %cst, %grid_y = %cst,
331 gpu.terminator
346 // CHECK-DL-LABEL: gpu.module @function_call_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
348 // CHECK: gpu.module @function_call_kernel {
349 // CHECK: gpu.func @function_call_kernel()
353 // CHECK: gpu.return
367 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %arg0, %grid_y = %arg0,
371 gpu.terminator
376 // CHECK-DL-LABEL: gpu.module @non_constant_launches_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
378 // CHECK: module attributes {gpu.container_module}
382 // This test checks memory attributions for gpu.launch, using both workgroup and private attributions.
388 // CHECK: gpu.launch_func @launch_memory_attributions_0_kernel::@launch_memory_attributions_0_kernel
389 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %128, %grid_y = %128,
399 gpu.terminator
404 // CHECK-DL-LABEL: gpu.module @launch_memory_attributions_0_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
406 // CHECK-LABEL: gpu.module @launch_memory_attributions_0_kernel
407 // CHECK-NEXT: gpu.func @launch_memory_attributions_0_kernel
410 // CHECK: %[[TID:.*]] = gpu.thread_id x
422 // CHECK: gpu.func {{.*}} private(%[[KERNEL_ARG:.*]] : memref<3xf32, 5>) {{.*}} {
425 // CHECK: gpu.return
427 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1,
433 gpu.terminator
438 // CHECK-DL-LABEL: gpu.module @launch_memory_attributions_1_kernel attributes {dlti.dl_spec = #dlti.dl_spec<index = 32 : i32>}
441 // CHECK: module attributes {gpu.container_module}
468 // CHECK: gpu.launch_func @launch_cluster_kernel::@launch_cluster_kernel clusters in (%[[CDIMX]], %[[CDIMY]], %[[CDIMZ]]) blocks in (%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]]) threads in (%[[BDIMX]], %[[BDIMY]], %[[BDIMZ]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref<?xf32, 1>)
469 // CHECK-NOT: gpu.launch blocks
470 gpu.launch clusters(%cx, %cy, %cz) in (%cluster_x = %cDimX, %cluster_y = %cDimY,
479 gpu.terminator
484 // CHECK-LABEL: gpu.module @launch_cluster_kernel
485 // CHECK-NEXT: gpu.func @launch_cluster_kernel
489 // CHECK-NEXT: %[[BID:.*]] = gpu.block_id x
490 // CHECK-NEXT: = gpu.block_id y
491 // CHECK-NEXT: = gpu.block_id z
492 // CHECK-NEXT: %[[TID:.*]] = gpu.thread_id x
493 // CHECK-NEXT: = gpu.thread_id y
494 // CHECK-NEXT: = gpu.thread_id z
495 // CHECK-NEXT: = gpu.grid_dim x
496 // CHECK-NEXT: = gpu.grid_dim y
497 // CHECK-NEXT: = gpu.grid_dim z
498 // CHECK-NEXT: %[[BDIM:.*]] = gpu.block_dim x
499 // CHECK-NEXT: = gpu.block_dim y
500 // CHECK-NEXT: = gpu.block_dim z
501 // CHECK-NEXT: %[[CID:.*]] = gpu.cluster_id x
502 // CHECK-NEXT: = gpu.cluster_id y
503 // CHECK-NEXT: = gpu.cluster_id z
504 // CHECK-NEXT: %[[CDIM:.*]] = gpu.cluster_dim x
505 // CHECK-NEXT: = gpu.cluster_dim y
506 // CHECK-NEXT: = gpu.cluster_dim z
512 // This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch
514 // CHECK: gpu.launch_func @test_module::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
515 // CHECK: gpu.module @test_module
516 // CHECK: gpu.func @test_kernel_func()
525 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
528 gpu.terminator
534 // This test tests the two optional attributes kernelModule and kernelFunc for gpu.launch, when kernelModule already exists.
536 // CHECK-LABEL: gpu.module @existing_module
537 // CHECK: gpu.func @test_kernel_func()
538 // CHECK: gpu.func @test_kernel_func_0()
539 // CHECK-NOT: gpu.module @testExistingModule_kernel
540 // CHECK-NOT: gpu.func @testExistingModule_kernel()
542 // CHECK: gpu.launch_func @existing_module::@test_kernel_func_0 blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
544 gpu.module @existing_module {
545 gpu.func @test_kernel_func() {
546 gpu.return
558 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
561 gpu.terminator
567 // This test tests the optional attribute kernelModule for gpu.launch.
569 // CHECK: gpu.launch_func @test_module::@testKernelModuleOnly_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
570 // CHECK: gpu.module @test_module
571 // CHECK: gpu.func @testKernelModuleOnly_kernel()
580 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
583 gpu.terminator
589 // This test tests the optional attribute kernelFunc for gpu.launch.
591 // CHECK: gpu.launch_func @test_kernel_func::@test_kernel_func blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
593 // CHECK: gpu.module @test_kernel_func
594 // CHECK: gpu.func @test_kernel_func()
603 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
606 gpu.terminator
612 // This test tests gpu.launch when optional attributes kernelModule and kernelFunc are not specified.
614 // CHECK: gpu.launch_func @testNoAttributes_kernel::@testNoAttributes_kernel blocks in (%[[GRID_X:.*]], %[[GRID_Y:.*]], %[[GRID_Z:.*]]) threads in (%[[BLOCK_X:.*]], %[[BLOCK_Y:.*]], %[[BLOCK_Z:.*]])
616 // CHECK: gpu.module @testNoAttributes_kernel
617 // CHECK: gpu.func @testNoAttributes_kernel()
626 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, %grid_z = %gDimZ)
629 gpu.terminator