1// RUN: mlir-opt %s \ 2// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ 3// RUN: | mlir-runner \ 4// RUN: --shared-libs=%mlir_cuda_runtime \ 5// RUN: --shared-libs=%mlir_runner_utils \ 6// RUN: --shared-libs=%mlir_c_runner_utils \ 7// RUN: --entry-point-result=void \ 8// RUN: | FileCheck %s 9 10// CHECK: 2000 11module attributes {gpu.container_module} { 12 func.func @main() { 13 %c1 = arith.constant 1 : index 14 %c0 = arith.constant 0 : index 15 %c1000_i32 = arith.constant 1000 : i32 16 %memref = gpu.alloc host_shared () : memref<1xi32> 17 memref.store %c1000_i32, %memref[%c1] : memref<1xi32> 18 gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) { 19 %1 = memref.load %memref[%c1] : memref<1xi32> 20 %2 = arith.addi %1, %1 : i32 21 memref.store %2, %memref[%c1] : memref<1xi32> 22 gpu.terminator 23 } 24 %0 = memref.load %memref[%c1] : memref<1xi32> 25 vector.print %0 : i32 26 return 27 } 28} 29