Home
last modified time | relevance | path

Searched refs:gpu (Results 1 – 25 of 439) sorted by relevance

12345678910>>...18

/llvm-project/mlir/test/Dialect/GPU/
H A Dops.mlir7 module attributes {gpu.container_module} {
11 // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
12 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %sz, %grid_y = %sz, %grid_z = %sz)
14 // CHECK: gpu.terminator
15 gpu.terminator
22 // CHECK: gpu.launch blocks(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}) threads(%{{.*}}, %{{.*}}, %{{.*}}) in (%{{.*}} = %{{.*}}, %{{.*}} = %{{.*}}, %{{.*}} = %{{.*}})
23 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %blk, %grid_y = %blk, %grid_z = %blk)
27 // CHECK: gpu.terminator
28 gpu.terminator
35 // CHECK: gpu
[all...]
H A Dsparse-roundtrip.mlir3 module attributes {gpu.container_module} {
6 // CHECK: %{{.*}} = gpu.wait async
7 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xindex>
8 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64>
9 …// CHECK: %{{.*}}, %{{.*}} = gpu.create_coo async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{…
10 …// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}} : index into me…
11 …// CHECK: %{{.*}}, %{{.*}} = gpu.spmv_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} into f…
12 …// CHECK: %{{.*}} = gpu.spmv async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> in…
13 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
14 // CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}}
[all …]
H A Dinvalid.mlir5 "gpu.launch"(%sz, %sz, %sz, %sz, %sz) ({
6 gpu.return
15 "gpu.launch"(%sz, %sz, %sz, %sz, %sz, %sz) ({
18 gpu.terminator
26 // @expected-note@+1 {{in 'gpu.launch' body region}}
27 gpu.launch blocks(%bx, %by, %bz) in (%sbx = %sz, %sby = %sz, %sbz = %sz)
29 // @expected-error@+2 {{expected 'gpu.terminator' or a terminator with successors}}
31 "gpu.yield"(%one) : (i32) -> ()
40 "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz)
49 // expected-error@+1 {{expected the closest surrounding module to have the 'gpu
[all...]
H A Dasync-region.mlir1 // RUN: mlir-opt -gpu-async-region %s | FileCheck %s
3 // CHECK: module attributes {gpu.container_module}
4 module attributes {gpu.container_module} {
6 gpu.module @kernels {
7 gpu.func @kernel() kernel { gpu.return }
14 // CHECK: %[[t0:.*]] = gpu.wait async
15 // CHECK: %[[t1:.*]] = gpu.launch_func async [%[[t0]]]
16 gpu.launch_func @kernels::@kernel
18 // CHECK: %[[t2:.*]] = gpu.launch_func async [%[[t1]]]
19 gpu.launch_func @kernels::@kernel
[all …]
H A Dcanonicalize.mlir3 // Fold all the gpu.wait ops as they are redundant.
6 %1 = gpu.wait async
7 gpu.wait []
8 %3 = gpu.wait async
9 gpu.wait [%3]
12 // CHECK-NOT: gpu.wait
18 // CHECK-NEXT: gpu.barrier
21 gpu.barrier
22 gpu.barrier
28 // Replace uses of gpu
[all...]
H A Doutlining.mlir1 // RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining -split-input-file -verify-diagnostics %s | FileCheck %s
2 // RUN: mlir-opt -allow-unregistered-dialect -gpu-launch-sink-index-computations -gpu-kernel-outlining=data-layout-str='#dlti.dl_spec<#dlti.dl_entry<index,32:i32>>' -split-input-file %s | FileCheck --check-prefix CHECK-DL %s
4 // CHECK: module attributes {gpu.container_module}
25 // CHECK: gpu.launch_func @launch_kernel::@launch_kernel blocks in (%[[GDIMX]], %[[GDIMY]], %[[GDIMZ]]) threads in (%[[BDIMX]], %[[BDIMY]], %[[BDIMZ]]) args(%[[ARG0]] : f32, %[[ARG1]] : memref<?xf32, 1>)
26 // CHECK-NOT: gpu.launch blocks
27 gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %gDimX, %grid_y = %gDimY,
34 gpu.terminator
39 // CHECK-DL-LABEL: gpu
[all...]
H A Dsubgroup-redule-lowering.mlir
H A Dint-range-interface.mlir13gpu.launch blocks(%block_id_x, %block_id_y, %block_id_z) in (%grid_dim_x = %0, %grid_dim_y = %1, %…
46 %thread_id_op = gpu.thread_id y
48 gpu.terminator
57 module attributes {gpu.container_module} {
58 gpu.module @gpu_module {
59 llvm.func @kernel() attributes {gpu.kernel} {
61 %grid_dim_x = gpu.grid_dim x
62 %grid_dim_y = gpu.grid_dim y
63 %grid_dim_z = gpu.grid_dim z
72 %block_id_x = gpu.block_id x
[all …]
/llvm-project/mlir/test/Conversion/GPUToSPIRV/
H A Dreductions.mlir1 // RUN: mlir-opt -split-input-file -convert-gpu-to-spirv -verify-diagnostics %s -o - | FileCheck %s
4 gpu.container_module,
8 gpu.module @kernels {
11 gpu.func @test(%arg : f32) kernel
14 %reduced = gpu.all_reduce add %arg uniform {} : (f32) -> (f32)
15 gpu.return
24 gpu.container_module,
28 gpu.module @kernels {
31 gpu.func @test(%arg : f32) kernel
34 %reduced = gpu
[all...]
H A Dwmma-ops-to-spirv-khr-coop-matrix.mlir1 // RUN: mlir-opt --convert-gpu-to-spirv --cse \
5 gpu.container_module,
11 gpu.module @kernels {
14gpu.func @gpu_wmma_load_op(%arg0 : memref<32x32xf16, #spirv.storage_class<StorageBuffer>>) kernel
21 %0 = gpu.subgroup_mma_load_matrix %arg0[%i, %j] {leadDimension = 32 : index} :
22 memref<32x32xf16, #spirv.storage_class<StorageBuffer>> -> !gpu.mma_matrix<16x16xf16, "COp">
26 %1 = gpu.subgroup_mma_load_matrix %arg0[%i, %j] {leadDimension = 32 : index, transpose} :
27 memref<32x32xf16, #spirv.storage_class<StorageBuffer>> -> !gpu.mma_matrix<16x16xf16, "COp">
29 gpu.return
35 gpu.func @gpu_wmma_store_op(%arg0: memref<32x32xf16, #spirv.storage_class<StorageBuffer>>,
[all …]
H A Dbuiltins-vulkan.mlir1 // RUN: mlir-opt -split-input-file -convert-gpu-to-spirv="use-64bit-index=false" %s -o - | FileChec…
2 // RUN: mlir-opt -split-input-file -convert-gpu-to-spirv="use-64bit-index=true" %s -o - | FileCheck…
5 gpu.container_module,
10 gpu.launch_func @kernels::@builtin_workgroup_id_x
19 gpu.module @kernels {
20 gpu.func @builtin_workgroup_id_x() kernel
26 %0 = gpu.block_id x
27 gpu.return
35 gpu.container_module,
41 gpu.launch_func @kernels::@builtin_workgroup_id_y
[all …]
/llvm-project/mlir/test/Dialect/SparseTensor/GPU/
H A Dgpu_combi.mlir5 // RUN: --sparse-gpu-codegen | FileCheck %s
10 // CHECK-LABEL: gpu.module @sparse_kernels
11 // CHECK: gpu.func @kernel1
12 // CHECK: gpu.func @kernel0
15 // CHECK: gpu.alloc async
16 // CHECK: gpu.memcpy async
17 // CHECK: gpu.alloc async
18 // CHECK: gpu.memcpy async
19 // CHECK: gpu.alloc async
20 // CHECK: gpu.memcpy async
[all …]
H A Dgpu_spgemm_lib.mlir1 // RUN: mlir-opt %s --linalg-generalize-named-ops --sparse-gpu-codegen="num-threads=0" | FileCheck …
19 // CHECK: %[[VAL_14:.*]] = gpu.wait async
21 // CHECK: %[[VAL_16:.*]], %[[VAL_17:.*]] = gpu.alloc async {{\[}}%[[VAL_14]]] (%[[VAL_15]…
22 // CHECK: %[[VAL_18:.*]] = gpu.memcpy async {{\[}}%[[VAL_17]]] %[[VAL_16]], %[[VAL_8]] : …
23 // CHECK: %[[VAL_19:.*]] = gpu.wait async
25 // CHECK: %[[VAL_21:.*]], %[[VAL_22:.*]] = gpu.alloc async {{\[}}%[[VAL_19]]] (%[[VAL_20]…
26 // CHECK: %[[VAL_23:.*]] = gpu.memcpy async {{\[}}%[[VAL_22]]] %[[VAL_21]], %[[VAL_9]] : …
27 // CHECK: %[[VAL_24:.*]] = gpu.wait async
29 // CHECK: %[[VAL_26:.*]], %[[VAL_27:.*]] = gpu.alloc async {{\[}}%[[VAL_24]]] (%[[VAL_25]…
30 // CHECK: %[[VAL_28:.*]] = gpu.memcpy async {{\[}}%[[VAL_27]]] %[[VAL_26]], %[[VAL_10]] :…
[all …]
H A Dgpu_matvec.mlir5 // RUN: --sparse-gpu-codegen | FileCheck %s
12 // CHECK-LABEL: gpu.module @sparse_kernels
13 // CHECK: gpu.func @kernel0(
21 // CHECK: %[[VAL_7:.*]] = gpu.block_id x
22 // CHECK: %[[VAL_8:.*]] = gpu.block_dim x
23 // CHECK: %[[VAL_9:.*]] = gpu.thread_id x
24 // CHECK: %[[VAL_10:.*]] = gpu.grid_dim x
43 // CHECK: gpu.return
47 // CHECK: gpu.wait async
48 // CHECK: gpu.alloc async
[all …]
H A Dgpu_matmul.mlir5 // RUN: --sparse-gpu-codegen | FileCheck %s
12 // CHECK-LABEL: gpu.module @sparse_kernels
13 // CHECK-LABEL: gpu.func @kernel0(
23 // CHECK: %[[VAL_9:.*]] = gpu.block_id x
24 // CHECK: %[[VAL_10:.*]] = gpu.block_dim x
25 // CHECK: %[[VAL_11:.*]] = gpu.thread_id x
26 // CHECK: %[[VAL_12:.*]] = gpu.grid_dim x
46 // CHECK: gpu.return
51 // CHECK: gpu.wait async
52 // CHECK: gpu.alloc async
[all …]
H A Dgpu_matvec_lib.mlir1 // RUN: mlir-opt %s --linalg-generalize-named-ops --sparse-gpu-codegen="num-threads=0" | FileCheck %s
21 // CHECK: %[[VAL_11:.*]] = gpu.wait async
23 // CHECK: %[[VAL_13:.*]], %[[VAL_14:.*]] = gpu.alloc async {{\[}}%[[VAL_11]]] (%[[VAL_12]]) : memref<?xindex>
24 // CHECK: %[[VAL_15:.*]] = gpu.memcpy async {{\[}}%[[VAL_14]]] %[[VAL_13]], %[[VAL_8]] : memref<?xindex>, memref<?xindex, strided<[?], offset: ?>>
25 // CHECK: %[[VAL_16:.*]] = gpu.wait async
27 // CHECK: %[[VAL_18:.*]], %[[VAL_19:.*]] = gpu.alloc async {{\[}}%[[VAL_16]]] (%[[VAL_17]]) : memref<?xindex>
28 // CHECK: %[[VAL_20:.*]] = gpu.memcpy async {{\[}}%[[VAL_19]]] %[[VAL_18]], %[[VAL_9]] : memref<?xindex>, memref<?xindex, strided<[?], offset: ?>>
29 // CHECK: %[[VAL_21:.*]] = gpu.wait async
31 // CHECK: %[[VAL_23:.*]], %[[VAL_24:.*]] = gpu.alloc async {{\[}}%[[VAL_21]]] (%[[VAL_22]]) : memref<?xf64>
32 // CHECK: %[[VAL_25:.*]] = gpu
[all...]
/llvm-project/libc/test/integration/src/__support/GPU/
H A Dscan_reduce.cpp21 uint64_t mask = gpu::get_lane_mask(); in test_reduce()
22 uint32_t x = gpu::reduce(mask, 1); in test_reduce()
23 EXPECT_EQ(x, gpu::get_lane_size()); in test_reduce()
25 uint32_t y = gpu::reduce(mask, gpu::get_lane_id()); in test_reduce()
26 EXPECT_EQ(y, sum(gpu::get_lane_size() - 1)); in test_reduce()
29 if (gpu::get_lane_id() % 2) in test_reduce()
30 z = gpu::reduce(gpu::get_lane_mask(), 1); in test_reduce()
31 gpu::sync_lane(mask); in test_reduce()
33 EXPECT_EQ(z, gpu::get_lane_id() % 2 ? gpu::get_lane_size() / 2 : 0); in test_reduce()
40 uint64_t mask = gpu::get_lane_mask(); in test_scan()
[all …]
/llvm-project/mlir/test/Conversion/GPUCommon/
H A Dlower-memory-space-attrs.mlir1 // RUN: mlir-opt %s -split-input-file -convert-gpu-to-rocdl | FileCheck %s --check-prefixes=CHECK,R…
2 // RUN: mlir-opt %s -split-input-file -convert-gpu-to-nvvm | FileCheck %s --check-prefixes=CHECK,NV…
4 gpu.module @kernel {
5 gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, #gpu.address_space<private>>) {
7 memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<private>>
8 gpu.return
20 gpu.module @kernel {
21 gpu.func @workgroup(%arg0: f32) workgroup(%arg1: memref<4xf32, #gpu.address_space<workgroup>>) {
23 memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<workgroup>>
24 gpu.return
[all …]
H A Dlower-sparse-to-gpu-runtime-calls.mlir1 // RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
3 module attributes {gpu.container_module} {
18 %token0 = gpu.wait async
19 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
20 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
21 …%spmat, %token4 = gpu.create_coo async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref…
22 %dnvec, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0 : index into memref<?xf64>
23 %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %spmat, %dnvec, %dnvec into f64
24 %token7 = gpu.spmv async [%token6] %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
25 %token8 = gpu.destroy_sp_mat async [%token7] %spmat
[all …]
/llvm-project/mlir/lib/Conversion/GPUToNVVM/
H A DLowerGpuOpsToNVVMOps.cpp53 /// Convert gpu dialect shfl mode enum to the equivalent nvvm one. in convertShflKind()
54 static NVVM::ShflKind convertShflKind(gpu::ShuffleMode mode) { in convertShflKind()
56 case gpu::ShuffleMode::XOR: in convertShflKind()
58 case gpu::ShuffleMode::UP: in convertShflKind()
60 case gpu::ShuffleMode::DOWN: in convertShflKind()
62 case gpu::ShuffleMode::IDX: in convertShflKind()
69 convertReduxKind(gpu::AllReduceOperation mode) { in convertReduxKind()
71 case gpu::AllReduceOperation::ADD: in convertReduxKind()
73 case gpu::AllReduceOperation::MUL: in convertReduxKind()
75 case gpu in convertReduxKind()
[all...]
/llvm-project/mlir/test/Conversion/VectorToGPU/
H A Dvector-to-mma-ops.mlir1 // RUN: mlir-opt %s -pass-pipeline="builtin.module(func.func(convert-vector-to-gpu),canonicalize)" --split-input-file | FileCheck %s
11 // CHECK-DAG: %[[A:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "AOp">
12 // CHECK-DAG: %[[B:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "BOp">
13 // CHECK-DAG: %[[C:.+]] = gpu.subgroup_mma_load_matrix %{{.*}}[%{{.*}}, %{{.*}}] {leadDimension = 16 : index} : memref<16x16xf16> -> !gpu.mma_matrix<16x16xf16, "COp">
14 // CHECK: %[[D:.+]] = gpu.subgroup_mma_compute %[[A]], %[[B]], %[[C]] : !gpu.mma_matrix<16x16xf16, "AOp">, !gpu
[all...]
/llvm-project/mlir/lib/Conversion/GPUCommon/
H A DGPUOpsLowering.h22 LLVM::LLVMFuncOp getOrDefineFunction(gpu::GPUModuleOp moduleOp, Location loc,
29 gpu::GPUModuleOp moduleOp, Type llvmI8,
38 /// Lowering for gpu.dynamic.shared.memory to LLVM dialect. The pattern first
42 : public ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp> {
44 gpu::DynamicSharedMemoryOp>::ConvertOpToLLVMPattern;
47 : ConvertOpToLLVMPattern<gpu::DynamicSharedMemoryOp>(converter),
51 matchAndRewrite(gpu::DynamicSharedMemoryOp op, OpAdaptor adaptor,
65 /// The attribute name to use instead of `gpu.kernel`. Null if no attribute
82 struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
85 : ConvertOpToLLVMPattern<gpu
[all...]
/llvm-project/mlir/test/Dialect/XeGPU/
H A DXeGPUOps.mlir7 // CHECK-LABEL: gpu.module @test {
8 gpu.module @test {
9 // CHECK: gpu.func @test_create_nd_tdesc_vc_1(%[[arg0:.*]]: memref<24x32xf32>) {
10 gpu.func @test_create_nd_tdesc_vc_1(%src: memref<24x32xf32>) {
13 gpu.return
16 // CHECK: gpu.func @test_create_nd_tdesc_with_sg_map(%[[arg0:.*]]: memref<24x32xf32>) {
17 gpu.func @test_create_nd_tdesc_with_sg_map(%src: memref<24x32xf32>) {
21 gpu.return
24 // CHECK: gpu.func @test_create_nd_tdesc_vc_2(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index) {
25 gpu
[all...]
/llvm-project/mlir/test/Conversion/GPUToNVVM/
H A Dgpu-to-nvvm-32b.mlir1 // RUN: mlir-opt %s -convert-gpu-to-nvvm='index-bitwidth=32' -split-input-file | FileCheck %s
5 gpu.module @test_module_0 {
11 %tIdX = gpu.thread_id x
12 %tIdY = gpu.thread_id y
13 %tIdZ = gpu.thread_id z
15 %bDimX = gpu.block_dim x
16 %bDimY = gpu.block_dim y
17 %bDimZ = gpu.block_dim z
19 %bIdX = gpu.block_id x
20 %bIdY = gpu
[all...]
/llvm-project/mlir/test/Integration/GPU/SYCL/
H A Dgpu-reluf32-to-spirv.mlir1 // RUN: mlir-opt %s -pass-pipeline='builtin.module(spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \
8 module @relu attributes {gpu.container_module} {
40 %memref = gpu.alloc host_shared () : memref<4x5xf32>
42 %memref_0 = gpu.alloc host_shared () : memref<4x5xi1>
43 %2 = gpu.wait async
44 %3 = gpu.launch_func async [%2] @test_kernel::@test_kernel blocks in (%c4, %c5, %c1) threads in (%c1, %c1, %c1) args(%memref : memref<4x5xf32>, %cst : f32, %memref_0 : memref<4x5xi1>)
45 gpu
[all...]

12345678910>>...18