1# RUN: %PYTHON %s | FileCheck %s 2# This is just a smoke test that the dialect is functional. 3 4from mlir.ir import * 5from mlir.dialects import nvgpu, arith, memref 6 7 8def constructAndPrintInModule(f): 9 print("\nTEST:", f.__name__) 10 with Context(), Location.unknown(): 11 module = Module.create() 12 with InsertionPoint(module.body): 13 f() 14 print(module) 15 return f 16 17 18# CHECK-LABEL: testTypes 19@constructAndPrintInModule 20def testTypes(): 21 tensorMemrefType = MemRefType.get( 22 (128, 64), F16Type.get(), memory_space=Attribute.parse("3") 23 ) 24 # CHECK: !nvgpu.tensormap.descriptor<tensor = memref<128x64xf16, 3>, swizzle = swizzle_128b, l2promo = l2promo_256b, oob = nan, interleave = none> 25 tma_desc = nvgpu.TensorMapDescriptorType.get( 26 tensorMemrefType, 27 nvgpu.TensorMapSwizzleKind.SWIZZLE_128B, 28 nvgpu.TensorMapL2PromoKind.L2PROMO_256B, 29 nvgpu.TensorMapOOBKind.OOB_NAN, 30 nvgpu.TensorMapInterleaveKind.INTERLEAVE_NONE, 31 ) 32 print(tma_desc) 33 34 35# CHECK-LABEL: testSmoke 36@constructAndPrintInModule 37def testSmoke(): 38 cst = arith.ConstantOp(value=42, result=IndexType.get()) 39 mem_t = MemRefType.get((10, 10), F32Type.get(), memory_space=Attribute.parse("3")) 40 vec_t = VectorType.get((4, 1), F32Type.get()) 41 mem = memref.AllocOp(mem_t, [], []) 42 # CHECK: %0 = nvgpu.ldmatrix %alloc[%c42, %c42] {numTiles = 4 : i32, transpose = false} : memref<10x10xf32, 3> -> vector<4x1xf32> 43 nvgpu.LdMatrixOp(vec_t, mem, [cst, cst], False, 4) 44