1"""Common utilities that are useful for all the benchmarks.""" 2import numpy as np 3 4import mlir.all_passes_registration 5 6from mlir import ir 7from mlir.dialects import arith 8from mlir.dialects import builtin 9from mlir.dialects import memref 10from mlir.dialects import scf 11from mlir.dialects import std 12from mlir.passmanager import PassManager 13 14 15def setup_passes(mlir_module): 16 """Setup pass pipeline parameters for benchmark functions. 17 """ 18 opt = ( 19 "parallelization-strategy=0" 20 " vectorization-strategy=0 vl=1 enable-simd-index32=False" 21 ) 22 pipeline = ( 23 f"builtin.func" 24 f"(linalg-generalize-named-ops,linalg-fuse-elementwise-ops)," 25 f"sparsification{{{opt}}}," 26 f"sparse-tensor-conversion," 27 f"builtin.func" 28 f"(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf)," 29 f"convert-scf-to-std," 30 f"func-bufferize," 31 f"tensor-constant-bufferize," 32 f"builtin.func(tensor-bufferize,std-bufferize,finalizing-bufferize)," 33 f"convert-vector-to-llvm" 34 f"{{reassociate-fp-reductions=1 enable-index-optimizations=1}}," 35 f"lower-affine," 36 f"convert-memref-to-llvm," 37 f"convert-std-to-llvm," 38 f"reconcile-unrealized-casts" 39 ) 40 PassManager.parse(pipeline).run(mlir_module) 41 42 43def create_sparse_np_tensor(dimensions, number_of_elements): 44 """Constructs a numpy tensor of dimensions `dimensions` that has only a 45 specific number of nonzero elements, specified by the `number_of_elements` 46 argument. 47 """ 48 tensor = np.zeros(dimensions, np.float64) 49 tensor_indices_list = [ 50 [np.random.randint(0, dimension) for dimension in dimensions] 51 for _ in range(number_of_elements) 52 ] 53 for tensor_indices in tensor_indices_list: 54 current_tensor = tensor 55 for tensor_index in tensor_indices[:-1]: 56 current_tensor = current_tensor[tensor_index] 57 current_tensor[tensor_indices[-1]] = np.random.uniform(1, 100) 58 return tensor 59 60 61def get_kernel_func_from_module(module: ir.Module) -> builtin.FuncOp: 62 """Takes an mlir module object and extracts the function object out of it. 63 This function only works for a module with one region, one block, and one 64 operation. 65 """ 66 assert len(module.operation.regions) == 1, \ 67 "Expected kernel module to have only one region" 68 assert len(module.operation.regions[0].blocks) == 1, \ 69 "Expected kernel module to have only one block" 70 assert len(module.operation.regions[0].blocks[0].operations) == 1, \ 71 "Expected kernel module to have only one operation" 72 return module.operation.regions[0].blocks[0].operations[0] 73 74 75def emit_timer_func() -> builtin.FuncOp: 76 """Returns the declaration of nano_time function. If nano_time function is 77 used, the `MLIR_RUNNER_UTILS` and `MLIR_C_RUNNER_UTILS` must be included. 78 """ 79 i64_type = ir.IntegerType.get_signless(64) 80 nano_time = builtin.FuncOp( 81 "nano_time", ([], [i64_type]), visibility="private") 82 nano_time.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() 83 return nano_time 84 85 86def emit_benchmark_wrapped_main_func(func, timer_func): 87 """Takes a function and a timer function, both represented as FuncOp 88 objects, and returns a new function. This new function wraps the call to 89 the original function between calls to the timer_func and this wrapping 90 in turn is executed inside a loop. The loop is executed 91 len(func.type.results) times. This function can be used to create a 92 "time measuring" variant of a function. 93 """ 94 i64_type = ir.IntegerType.get_signless(64) 95 memref_of_i64_type = ir.MemRefType.get([-1], i64_type) 96 wrapped_func = builtin.FuncOp( 97 # Same signature and an extra buffer of indices to save timings. 98 "main", 99 (func.arguments.types + [memref_of_i64_type], func.type.results), 100 visibility="public" 101 ) 102 wrapped_func.attributes["llvm.emit_c_interface"] = ir.UnitAttr.get() 103 104 num_results = len(func.type.results) 105 with ir.InsertionPoint(wrapped_func.add_entry_block()): 106 timer_buffer = wrapped_func.arguments[-1] 107 zero = arith.ConstantOp.create_index(0) 108 n_iterations = memref.DimOp(ir.IndexType.get(), timer_buffer, zero) 109 one = arith.ConstantOp.create_index(1) 110 iter_args = list(wrapped_func.arguments[-num_results - 1:-1]) 111 loop = scf.ForOp(zero, n_iterations, one, iter_args) 112 with ir.InsertionPoint(loop.body): 113 start = std.CallOp(timer_func, []) 114 call = std.CallOp( 115 func, 116 wrapped_func.arguments[:-num_results - 1] + loop.inner_iter_args 117 ) 118 end = std.CallOp(timer_func, []) 119 time_taken = arith.SubIOp(end, start) 120 memref.StoreOp(time_taken, timer_buffer, [loop.induction_variable]) 121 scf.YieldOp(list(call.results)) 122 std.ReturnOp(loop) 123 124 return wrapped_func 125