xref: /llvm-project/mlir/benchmark/python/common.py (revision fa90c9d5e7a310ea87b7032c39c0ca657c794abc)
1"""Common utilities that are useful for all the benchmarks."""
2import numpy as np
3
4import mlir.all_passes_registration
5
6from mlir import ir
7from mlir.dialects import arith
8from mlir.dialects import builtin
9from mlir.dialects import memref
10from mlir.dialects import scf
11from mlir.dialects import std
12from mlir.passmanager import PassManager
13
14
15def setup_passes(mlir_module):
16    """Setup pass pipeline parameters for benchmark functions.
17    """
18    opt = (
19        "parallelization-strategy=0"
20        " vectorization-strategy=0 vl=1 enable-simd-index32=False"
21    )
22    pipeline = (
23        f"builtin.func"
24        f"(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),"
25        f"sparsification{{{opt}}},"
26        f"sparse-tensor-conversion,"
27        f"builtin.func"
28        f"(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf),"
29        f"convert-scf-to-std,"
30        f"func-bufferize,"
31        f"tensor-constant-bufferize,"
32        f"builtin.func(tensor-bufferize,std-bufferize,finalizing-bufferize),"
33        f"convert-vector-to-llvm"
34        f"{{reassociate-fp-reductions=1 enable-index-optimizations=1}},"
35        f"lower-affine,"
36        f"convert-memref-to-llvm,"
37        f"convert-std-to-llvm,"
38        f"reconcile-unrealized-casts"
39    )
40    PassManager.parse(pipeline).run(mlir_module)
41
42
43def create_sparse_np_tensor(dimensions, number_of_elements):
44    """Constructs a numpy tensor of dimensions `dimensions` that has only a
45    specific number of nonzero elements, specified by the `number_of_elements`
46    argument.
47    """
48    tensor = np.zeros(dimensions, np.float64)
49    tensor_indices_list = [
50        [np.random.randint(0, dimension) for dimension in dimensions]
51        for _ in range(number_of_elements)
52    ]
53    for tensor_indices in tensor_indices_list:
54        current_tensor = tensor
55        for tensor_index in tensor_indices[:-1]:
56            current_tensor = current_tensor[tensor_index]
57        current_tensor[tensor_indices[-1]] = np.random.uniform(1, 100)
58    return tensor
59
60
61def get_kernel_func_from_module(module: ir.Module) -> builtin.FuncOp:
62    """Takes an mlir module object and extracts the function object out of it.
63    This function only works for a module with one region, one block, and one
64    operation.
65    """
66    assert len(module.operation.regions) == 1, \
67        "Expected kernel module to have only one region"
68    assert len(module.operation.regions[0].blocks) == 1, \
69        "Expected kernel module to have only one block"
70    assert len(module.operation.regions[0].blocks[0].operations) == 1, \
71        "Expected kernel module to have only one operation"
72    return module.operation.regions[0].blocks[0].operations[0]
73
74
75def emit_timer_func() -> builtin.FuncOp:
76    """Returns the declaration of nano_time function. If nano_time function is
77    used, the `MLIR_RUNNER_UTILS` and `MLIR_C_RUNNER_UTILS` must be included.
78    """
79    i64_type = ir.IntegerType.get_signless(64)
80    nano_time = builtin.FuncOp(
81        "nano_time", ([], [i64_type]), visibility="private")
82    nano_time.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
83    return nano_time
84
85
86def emit_benchmark_wrapped_main_func(func, timer_func):
87    """Takes a function and a timer function, both represented as FuncOp
88    objects, and returns a new function. This new function wraps the call to
89    the original function between calls to the timer_func and this wrapping
90    in turn is executed inside a loop. The loop is executed
91    len(func.type.results) times. This function can be used to create a
92    "time measuring" variant of a function.
93    """
94    i64_type = ir.IntegerType.get_signless(64)
95    memref_of_i64_type = ir.MemRefType.get([-1], i64_type)
96    wrapped_func = builtin.FuncOp(
97        # Same signature and an extra buffer of indices to save timings.
98        "main",
99        (func.arguments.types + [memref_of_i64_type], func.type.results),
100        visibility="public"
101    )
102    wrapped_func.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get()
103
104    num_results = len(func.type.results)
105    with ir.InsertionPoint(wrapped_func.add_entry_block()):
106        timer_buffer = wrapped_func.arguments[-1]
107        zero = arith.ConstantOp.create_index(0)
108        n_iterations = memref.DimOp(ir.IndexType.get(), timer_buffer, zero)
109        one = arith.ConstantOp.create_index(1)
110        iter_args = list(wrapped_func.arguments[-num_results - 1:-1])
111        loop = scf.ForOp(zero, n_iterations, one, iter_args)
112        with ir.InsertionPoint(loop.body):
113            start = std.CallOp(timer_func, [])
114            call = std.CallOp(
115                func,
116                wrapped_func.arguments[:-num_results - 1] + loop.inner_iter_args
117            )
118            end = std.CallOp(timer_func, [])
119            time_taken = arith.SubIOp(end, start)
120            memref.StoreOp(time_taken, timer_buffer, [loop.induction_variable])
121            scf.YieldOp(list(call.results))
122        std.ReturnOp(loop)
123
124    return wrapped_func
125