120861f1fSGuray Ozen// RUN: mlir-opt %s \ 220861f1fSGuray Ozen// RUN: | mlir-opt -gpu-lower-to-nvvm-pipeline="cubin-format=%gpu_compilation_format" \ 3*eb206e9eSAndrea Faulds// RUN: | mlir-runner \ 420861f1fSGuray Ozen// RUN: --shared-libs=%mlir_cuda_runtime \ 520861f1fSGuray Ozen// RUN: --shared-libs=%mlir_runner_utils \ 6f2251f93SGuray Ozen// RUN: --shared-libs=%mlir_c_runner_utils \ 720861f1fSGuray Ozen// RUN: --entry-point-result=void \ 820861f1fSGuray Ozen// RUN: | FileCheck %s 920861f1fSGuray Ozen 1020861f1fSGuray Ozen// CHECK: 2000 1120861f1fSGuray Ozenmodule attributes {gpu.container_module} { 1220861f1fSGuray Ozen func.func @main() { 1320861f1fSGuray Ozen %c1 = arith.constant 1 : index 1420861f1fSGuray Ozen %c0 = arith.constant 0 : index 1520861f1fSGuray Ozen %c1000_i32 = arith.constant 1000 : i32 1620861f1fSGuray Ozen %memref = gpu.alloc host_shared () : memref<1xi32> 1720861f1fSGuray Ozen memref.store %c1000_i32, %memref[%c1] : memref<1xi32> 1820861f1fSGuray Ozen gpu.launch blocks(%arg0, %arg1, %arg2) in (%arg6 = %c1, %arg7 = %c1, %arg8 = %c1) threads(%arg3, %arg4, %arg5) in (%arg9 = %c1, %arg10 = %c1, %arg11 = %c1) { 1920861f1fSGuray Ozen %1 = memref.load %memref[%c1] : memref<1xi32> 2020861f1fSGuray Ozen %2 = arith.addi %1, %1 : i32 2120861f1fSGuray Ozen memref.store %2, %memref[%c1] : memref<1xi32> 2220861f1fSGuray Ozen gpu.terminator 2320861f1fSGuray Ozen } 2420861f1fSGuray Ozen %0 = memref.load %memref[%c1] : memref<1xi32> 2520861f1fSGuray Ozen vector.print %0 : i32 2620861f1fSGuray Ozen return 2720861f1fSGuray Ozen } 2820861f1fSGuray Ozen} 29