# RUN: env SUPPORT_LIB=%mlir_cuda_runtime \ # RUN: %PYTHON %s | FileCheck %s # ===----------------------------------------------------------------------===// # Chapter 0 : Hello World # ===----------------------------------------------------------------------===// # # This program demonstrates Hello World: # 1. Build MLIR function with arguments # 2. Build MLIR GPU kernel # 3. Print from a GPU thread # 4. Pass arguments, JIT compile and run the MLIR function # # ===----------------------------------------------------------------------===// from mlir.dialects import gpu from tools.nvdsl import * # 1. The decorator generates a MLIR func.func. # Everything inside the Python function becomes the body of the func. # The decorator also translates `alpha` to an `index` type. @NVDSL.mlir_func def main(alpha): # 2. The decorator generates a MLIR gpu.launch. # Everything inside the Python function becomes the body of the gpu.launch. # This allows for late outlining of the GPU kernel, enabling optimizations # like constant folding from host to device. @NVDSL.mlir_gpu_launch(grid=(1, 1, 1), block=(4, 1, 1)) def kernel(): tidx = gpu.thread_id(gpu.Dimension.x) # + operator generates arith.addi myValue = alpha + tidx # Print from a GPU thread gpu.printf("GPU thread %llu has %llu\n", [tidx, myValue]) # 3. Call the GPU kernel kernel() alpha = 100 # 4. The `mlir_func` decorator JIT compiles the IR and executes the MLIR function. main(alpha) # CHECK: GPU thread 0 has 100 # CHECK: GPU thread 1 has 101 # CHECK: GPU thread 2 has 102 # CHECK: GPU thread 3 has 103