/llvm-project/mlir/test/Dialect/GPU/ |
H A D | ops.mlir | 7 module attributes {gpu.container_module} { 11 // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) 12 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz) 14 // CHECK: gpu.terminator 15 gpu.terminator 22 // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) 23 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %blk, %grid_y = %blk, %grid_z = %blk) 27 // CHECK: gpu.terminator 28 gpu.terminator 35 // CHECK: gpu [all...] |
H A D | sparse-roundtrip.mlir | 3 module attributes {gpu.container_module} { 6 // CHECK: %{{.*}} = gpu.wait async 7 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xindex> 8 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64> 9 …// CHECK: %{{.*}}, %{{.*}} = gpu.create_coo async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{… 10 …// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}} : index into me… 11 …// CHECK: %{{.*}}, %{{.*}} = gpu.spmv_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} into f… 12 …// CHECK: %{{.*}} = gpu.spmv async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> in… 13 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} 14 // CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}} [all …]
|
H A D | invalid.mlir | 5 "gpu.launch"(%sz, %sz, %sz, %sz, %sz) ({ 6 gpu.return 15 "gpu.launch"(%sz, %sz, %sz, %sz, %sz, %sz) ({ 18 gpu.terminator 26 // @expected-note@+1 {{in 'gpu.launch' body region}} 27 gpu.launch blocks(%bx, %by, %bz) in (%sbx = %sz, %sby = %sz, %sbz = %sz) 29 // @expected-error@+2 {{expected 'gpu.terminator' or a terminator with successors}} 31 "gpu.yield"(%one) : (i32) -> () 40 "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz) 49 // expected-error@+1 {{expected the closest surrounding module to have the 'gpu [all...] |
H A D | async-region.mlir | 1 // RUN: mlir-opt -gpu-async-region %s | FileCheck %s 3 // CHECK: module attributes {gpu.container_module} 4 module attributes {gpu.container_module} { 6 gpu.module @kernels { 7 gpu.func @kernel() kernel { gpu.return } 14 // CHECK: %[[t0:.*]] = gpu.wait async 15 // CHECK: %[[t1:.*]] = gpu.launch_func async [%[[t0]]] 16 gpu.launch_func @kernels::@kernel 18 // CHECK: %[[t2:.*]] = gpu.launch_func async [%[[t1]]] 19 gpu.launch_func @kernels::@kernel [all …]
|
H A D | canonicalize.mlir | 3 // Fold all the gpu.wait ops as they are redundant. 6 %1 = gpu.wait async 7 gpu.wait [] 8 %3 = gpu.wait async 9 gpu.wait [%3] 12 // CHECK-NOT: gpu.wait 18 // CHECK-NEXT: gpu.barrier 21 gpu.barrier 22 gpu.barrier 28 // Replace uses of gpu [all...] |
H A D | outlining.mlir | 1 // RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s 2 // RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file %s | FileCheck --check-prefix CHECK-DL %s 4 // CHECK: module attributes {gpu.container_module} 25 // CHECK: gpu.launch_func @launch_kernel::@launch_kernel blocks in (%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]]) threads in (%[[BDIMX]], %[[BDIMY]], %[[BDIMZ]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref<?xf32, 1>) 26 // CHECK-NOT: gpu.launch blocks 27 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY, 34 gpu.terminator 39 // CHECK-DL-LABEL: gpu [all...] |
H A D | subgroup-redule-lowering.mlir |
|
H A D | int-range-interface.mlir | 13 …gpu.launch blocks(%block_id_x, %block_id_y, %block_id_z) in (%grid_dim_x = %0, %grid_dim_y = %1, %… 46 %thread_id_op = gpu.thread_id y 48 gpu.terminator 57 module attributes {gpu.container_module} { 58 gpu.module @gpu_module { 59 llvm.func @kernel() attributes {gpu.kernel} { 61 %grid_dim_x = gpu.grid_dim x 62 %grid_dim_y = gpu.grid_dim y 63 %grid_dim_z = gpu.grid_dim z 72 %block_id_x = gpu.block_id x [all …]
|
/llvm-project/mlir/test/Conversion/GPUToSPIRV/ |
H A D | reductions.mlir | 1 // RUN: mlir-opt -split-input-file -convert-gpu-to-spirv -verify-diagnostics %s -o - | FileCheck %s 4 gpu.container_module, 8 gpu.module @kernels { 11 gpu.func @test(%arg : f32) kernel 14 %reduced = gpu.all_reduce add %arg uniform {} : (f32) -> (f32) 15 gpu.return 24 gpu.container_module, 28 gpu.module @kernels { 31 gpu.func @test(%arg : f32) kernel 34 %reduced = gpu [all...] |
H A D | wmma-ops-to-spirv-khr-coop-matrix.mlir | 1 // RUN: mlir-opt --convert-gpu-to-spirv --cse \ 5 gpu.container_module, 11 gpu.module @kernels { 14 … gpu.func @gpu_wmma_load_op(%arg0 : memref<32x32xf16, #spirv.storage_class<StorageBuffer>>) kernel 21 %0 = gpu.subgroup_mma_load_matrix %arg0[%i, %j] {leadDimension = 32 : index} : 22 memref<32x32xf16, #spirv.storage_class<StorageBuffer>> -> !gpu.mma_matrix<16x16xf16, "COp"> 26 %1 = gpu.subgroup_mma_load_matrix %arg0[%i, %j] {leadDimension = 32 : index, transpose} : 27 memref<32x32xf16, #spirv.storage_class<StorageBuffer>> -> !gpu.mma_matrix<16x16xf16, "COp"> 29 gpu.return 35 gpu.func @gpu_wmma_store_op(%arg0: memref<32x32xf16, #spirv.storage_class<StorageBuffer>>, [all …]
|
H A D | builtins-vulkan.mlir | 1 // RUN: mlir-opt -split-input-file -convert-gpu-to-spirv="use-64bit-index=false" %s -o - | FileChec… 2 // RUN: mlir-opt -split-input-file -convert-gpu-to-spirv="use-64bit-index=true" %s -o - | FileCheck… 5 gpu.container_module, 10 gpu.launch_func @kernels::@builtin_workgroup_id_x 19 gpu.module @kernels { 20 gpu.func @builtin_workgroup_id_x() kernel 26 %0 = gpu.block_id x 27 gpu.return 35 gpu.container_module, 41 gpu.launch_func @kernels::@builtin_workgroup_id_y [all …]
|
/llvm-project/mlir/test/Dialect/SparseTensor/GPU/ |
H A D | gpu_combi.mlir | 5 // RUN: --sparse-gpu-codegen | FileCheck %s 10 // CHECK-LABEL: gpu.module @sparse_kernels 11 // CHECK: gpu.func @kernel1 12 // CHECK: gpu.func @kernel0 15 // CHECK: gpu.alloc async 16 // CHECK: gpu.memcpy async 17 // CHECK: gpu.alloc async 18 // CHECK: gpu.memcpy async 19 // CHECK: gpu.alloc async 20 // CHECK: gpu.memcpy async [all …]
|
H A D | gpu_spgemm_lib.mlir | 1 // RUN: mlir-opt %s --linalg-generalize-named-ops --sparse-gpu-codegen="num-threads=0" | FileCheck … 19 // CHECK: %[[VAL_14:.*]] = gpu.wait async 21 // CHECK: %[[VAL_16:.*]], %[[VAL_17:.*]] = gpu.alloc async {{\[}}%[[VAL_14]]] (%[[VAL_15]… 22 // CHECK: %[[VAL_18:.*]] = gpu.memcpy async {{\[}}%[[VAL_17]]] %[[VAL_16]], %[[VAL_8]] : … 23 // CHECK: %[[VAL_19:.*]] = gpu.wait async 25 // CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = gpu.alloc async {{\[}}%[[VAL_19]]] (%[[VAL_20]… 26 // CHECK: %[[VAL_23:.*]] = gpu.memcpy async {{\[}}%[[VAL_22]]] %[[VAL_21]], %[[VAL_9]] : … 27 // CHECK: %[[VAL_24:.*]] = gpu.wait async 29 // CHECK: %[[VAL_26:.*]], %[[VAL_27:.*]] = gpu.alloc async {{\[}}%[[VAL_24]]] (%[[VAL_25]… 30 // CHECK: %[[VAL_28:.*]] = gpu.memcpy async {{\[}}%[[VAL_27]]] %[[VAL_26]], %[[VAL_10]] :… [all …]
|
H A D | gpu_matvec.mlir | 5 // RUN: --sparse-gpu-codegen | FileCheck %s 12 // CHECK-LABEL: gpu.module @sparse_kernels 13 // CHECK: gpu.func @kernel0( 21 // CHECK: %[[VAL_7:.*]] = gpu.block_id x 22 // CHECK: %[[VAL_8:.*]] = gpu.block_dim x 23 // CHECK: %[[VAL_9:.*]] = gpu.thread_id x 24 // CHECK: %[[VAL_10:.*]] = gpu.grid_dim x 43 // CHECK: gpu.return 47 // CHECK: gpu.wait async 48 // CHECK: gpu.alloc async [all …]
|
H A D | gpu_matmul.mlir | 5 // RUN: --sparse-gpu-codegen | FileCheck %s 12 // CHECK-LABEL: gpu.module @sparse_kernels 13 // CHECK-LABEL: gpu.func @kernel0( 23 // CHECK: %[[VAL_9:.*]] = gpu.block_id x 24 // CHECK: %[[VAL_10:.*]] = gpu.block_dim x 25 // CHECK: %[[VAL_11:.*]] = gpu.thread_id x 26 // CHECK: %[[VAL_12:.*]] = gpu.grid_dim x 46 // CHECK: gpu.return 51 // CHECK: gpu.wait async 52 // CHECK: gpu.alloc async [all …]
|
H A D | gpu_matvec_lib.mlir | 1 // RUN: mlir-opt %s --linalg-generalize-named-ops --sparse-gpu-codegen="num-threads=0" | FileCheck %s 21 // CHECK: %[[VAL_11:.*]] = gpu.wait async 23 // CHECK: %[[VAL_13:.*]], %[[VAL_14:.*]] = gpu.alloc async {{\[}}%[[VAL_11]]] (%[[VAL_12]]) : memref<?xindex> 24 // CHECK: %[[VAL_15:.*]] = gpu.memcpy async {{\[}}%[[VAL_14]]] %[[VAL_13]], %[[VAL_8]] : memref<?xindex>, memref<?xindex, strided<[?], offset: ?>> 25 // CHECK: %[[VAL_16:.*]] = gpu.wait async 27 // CHECK: %[[VAL_18:.*]], %[[VAL_19:.*]] = gpu.alloc async {{\[}}%[[VAL_16]]] (%[[VAL_17]]) : memref<?xindex> 28 // CHECK: %[[VAL_20:.*]] = gpu.memcpy async {{\[}}%[[VAL_19]]] %[[VAL_18]], %[[VAL_9]] : memref<?xindex>, memref<?xindex, strided<[?], offset: ?>> 29 // CHECK: %[[VAL_21:.*]] = gpu.wait async 31 // CHECK: %[[VAL_23:.*]], %[[VAL_24:.*]] = gpu.alloc async {{\[}}%[[VAL_21]]] (%[[VAL_22]]) : memref<?xf64> 32 // CHECK: %[[VAL_25:.*]] = gpu [all...] |
/llvm-project/libc/test/integration/src/__support/GPU/ |
H A D | scan_reduce.cpp | 21 uint64_t mask = gpu::get_lane_mask(); in test_reduce() 22 uint32_t x = gpu::reduce(mask, 1); in test_reduce() 23 EXPECT_EQ(x, gpu::get_lane_size()); in test_reduce() 25 uint32_t y = gpu::reduce(mask, gpu::get_lane_id()); in test_reduce() 26 EXPECT_EQ(y, sum(gpu::get_lane_size() - 1)); in test_reduce() 29 if (gpu::get_lane_id() % 2) in test_reduce() 30 z = gpu::reduce(gpu::get_lane_mask(), 1); in test_reduce() 31 gpu::sync_lane(mask); in test_reduce() 33 EXPECT_EQ(z, gpu::get_lane_id() % 2 ? gpu::get_lane_size() / 2 : 0); in test_reduce() 40 uint64_t mask = gpu::get_lane_mask(); in test_scan() [all …]
|
/llvm-project/mlir/test/Conversion/GPUCommon/ |
H A D | lower-memory-space-attrs.mlir | 1 // RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=CHECK,R… 2 // RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=CHECK,NV… 4 gpu.module @kernel { 5 gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, #gpu.address_space<private>>) { 7 memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<private>> 8 gpu.return 20 gpu.module @kernel { 21 gpu.func @workgroup(%arg0: f32) workgroup(%arg1: memref<4xf32, #gpu.address_space<workgroup>>) { 23 memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<workgroup>> 24 gpu.return [all …]
|
H A D | lower-sparse-to-gpu-runtime-calls.mlir | 1 // RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s 3 module attributes {gpu.container_module} { 18 %token0 = gpu.wait async 19 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex> 20 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64> 21 …%spmat, %token4 = gpu.create_coo async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref… 22 %dnvec, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0 : index into memref<?xf64> 23 %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %spmat, %dnvec, %dnvec into f64 24 %token7 = gpu.spmv async [%token6] %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64 25 %token8 = gpu.destroy_sp_mat async [%token7] %spmat [all …]
|
/llvm-project/mlir/lib/Conversion/GPUToNVVM/ |
H A D | LowerGpuOpsToNVVMOps.cpp | 53 /// Convert gpu dialect shfl mode enum to the equivalent nvvm one. in convertShflKind() 54 static NVVM::ShflKind convertShflKind(gpu::ShuffleMode mode) { in convertShflKind() 56 case gpu::ShuffleMode::XOR: in convertShflKind() 58 case gpu::ShuffleMode::UP: in convertShflKind() 60 case gpu::ShuffleMode::DOWN: in convertShflKind() 62 case gpu::ShuffleMode::IDX: in convertShflKind() 69 convertReduxKind(gpu::AllReduceOperation mode) { in convertReduxKind() 71 case gpu::AllReduceOperation::ADD: in convertReduxKind() 73 case gpu::AllReduceOperation::MUL: in convertReduxKind() 75 case gpu in convertReduxKind() [all...] |
/llvm-project/mlir/test/Conversion/VectorToGPU/ |
H A D | vector-to-mma-ops.mlir | 1 // RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-gpu),canonicalize)" --split-input-file | FileCheck %s 11 // CHECK-DAG: %[[A:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "AOp"> 12 // CHECK-DAG: %[[B:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "BOp"> 13 // CHECK-DAG: %[[C:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "COp"> 14 // CHECK: %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu [all...] |
/llvm-project/mlir/lib/Conversion/GPUCommon/ |
H A D | GPUOpsLowering.h | 22 LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc, 29 gpu::GPUModuleOp moduleOp, Type llvmI8, 38 /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first 42 : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> { 44 gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern; 47 : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter), 51 matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor, 65 /// The attribute name to use instead of `gpu.kernel`. Null if no attribute 82 struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> { 85 : ConvertOpToLLVMPattern<gpu [all...] |
/llvm-project/mlir/test/Dialect/XeGPU/ |
H A D | XeGPUOps.mlir | 7 // CHECK-LABEL: gpu.module @test { 8 gpu.module @test { 9 // CHECK: gpu.func @test_create_nd_tdesc_vc_1(%[[arg0:.*]]: memref<24x32xf32>) { 10 gpu.func @test_create_nd_tdesc_vc_1(%src: memref<24x32xf32>) { 13 gpu.return 16 // CHECK: gpu.func @test_create_nd_tdesc_with_sg_map(%[[arg0:.*]]: memref<24x32xf32>) { 17 gpu.func @test_create_nd_tdesc_with_sg_map(%src: memref<24x32xf32>) { 21 gpu.return 24 // CHECK: gpu.func @test_create_nd_tdesc_vc_2(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index) { 25 gpu [all...] |
/llvm-project/mlir/test/Conversion/GPUToNVVM/ |
H A D | gpu-to-nvvm-32b.mlir | 1 // RUN: mlir-opt %s -convert-gpu-to-nvvm='index-bitwidth=32' -split-input-file | FileCheck %s 5 gpu.module @test_module_0 { 11 %tIdX = gpu.thread_id x 12 %tIdY = gpu.thread_id y 13 %tIdZ = gpu.thread_id z 15 %bDimX = gpu.block_dim x 16 %bDimY = gpu.block_dim y 17 %bDimZ = gpu.block_dim z 19 %bIdX = gpu.block_id x 20 %bIdY = gpu [all...] |
/llvm-project/mlir/test/Integration/GPU/SYCL/ |
H A D | gpu-reluf32-to-spirv.mlir | 1 // RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \ 8 module @relu attributes {gpu.container_module} { 40 %memref = gpu.alloc host_shared () : memref<4x5xf32> 42 %memref_0 = gpu.alloc host_shared () : memref<4x5xi1> 43 %2 = gpu.wait async 44 %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<4x5xf32>, %cst : f32, %memref_0 : memref<4x5xi1>) 45 gpu [all...] |