/llvm-project/llvm/lib/Target/X86/ |
H A D | X86InstrAMX.td | 31 def TILELOADD#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), 36 def TILELOADDT1#Suffix : I<0x4b, MRMSrcMemFSIB, (outs TILE:$dst), 42 (ins sibmem:$dst, TILE:$src), 56 def TILEZERO : I<0x49, MRMr0, (outs TILE:$dst), (ins), 65 def PTILELOADDV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 69 def PTILELOADDT1V : PseudoI<(outs TILE:$dst), (ins GR16:$src1, 75 TILE:$src4), []>; 78 def PTILEZEROV : PseudoI<(outs TILE:$dst), (ins GR16:$src1, GR16:$src2), 79 [(set TILE:$dst, (int_x86_tilezero_internal 83 // Pseudo instructions, using immediates instead of tile register [all...] |
H A D | X86FastTileConfig.cpp | 1 //===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===// 12 /// know the shape of each physical tile registers, because the register 14 /// pass. It collects the shape information of each physical tile register 16 /// to tile config register. 53 return "Fast Tile Register Configure"; 77 "Fast Tile Register Configure", false, false) 79 "Fast Tile Register Configure", false, false) 84 // The instruction must have 3 operands: tile def, row, col. in isTileDef() 118 // PreTileConfig should configure the tile registers based on basic in configBasicBlock() 127 // AMX instructions that define tile registe in configBasicBlock() [all...] |
/llvm-project/clang/lib/Headers/ |
H A D | amxintrin.h | 20 __attribute__((__always_inline__, __nodebug__, __target__("amx-tile"))) 26 /// Load tile configuration from a 64-byte memory location specified by 27 /// "mem_addr". The tile configuration includes the tile type palette, the 29 /// palette_id is zero, that signifies the init state for both the tile 30 /// config and the tile data, and the tiles are zeroed. Any invalid 44 /// Stores the current tile configuration to a 64-byte memory location in _tile_loadconfig() 45 /// specified by "mem_addr". The tile configuration includes the tile type 60 /// Release the tile configuratio in _tile_storeconfig() 135 _tile_zero(tile) global() argument 283 _tile_stored_internal(unsigned short m,unsigned short n,void * base,__SIZE_TYPE__ stride,_tile1024i tile) _tile_stored_internal() argument 309 _tile1024i tile; global() member [all...] |
H A D | amxcomplexintrin.h | 22 /// accumulate the results into a packed single precision tile. Each dword 58 /// The destination tile. Max size is 1024 Bytes. 60 /// The 1st source tile. Max size is 1024 Bytes. 62 /// The 2nd source tile. Max size is 1024 Bytes. 66 /// accumulate the results into a packed single precision tile. Each dword 103 /// The destination tile. Max size is 1024 Bytes. 105 /// The 1st source tile. Max size is 1024 Bytes. 107 /// The 2nd source tile. Max size is 1024 Bytes. 123 /// accumulate the results into a packed single precision tile. Each dword 133 /// The destination tile. Max size is 1024 Bytes. [all …]
|
/llvm-project/clang/test/OpenMP/ |
H A D | tile_messages.cpp | 6 #pragma omp tile sizes in func() 11 #pragma omp tile sizes( in func() 15 #pragma omp tile sizes() in func() 19 #pragma omp tile sizes(5 in func() 24 #pragma omp tile sizes(5, in func() 28 #pragma omp tile sizes(5,) in func() 33 #pragma omp tile sizes(5+ in func() 37 #pragma omp tile sizes(5+) in func() 41 #pragma omp tile sizes(for) in func() 45 #pragma omp tile sizes(0) in func() [all …]
|
H A D | tile_ast_print.cpp | 24 // PRINT: #pragma omp tile sizes(5, 5) in foo1() 29 #pragma omp tile sizes(5,5) in foo1() 45 // PRINT: #pragma omp tile sizes(5, 5) in foo2() 50 #pragma omp tile sizes(5,5) in foo2() 71 // PRINT: #pragma omp tile sizes(5) in foo3() 75 #pragma omp tile sizes(5) in foo3() 95 // PRINT: #pragma omp tile sizes(5, 5) in foo4() 100 #pragma omp tile sizes(5, 5) in foo4() 128 // PRINT: #pragma omp tile sizes(5) in foo5() 132 #pragma omp tile sizes(5) in foo5() [all …]
|
/llvm-project/mlir/test/Conversion/ArmSMEToLLVM/ |
H A D | tile-spills-and-fills.mlir | 1 // RUN: mlir-opt %s -test-arm-sme-tile-allocation -split-input-file | \ 2 // RUN: FileCheck %s --check-prefix=AFTER-TILE-ALLOC 7 /// Checks tile spill/reloads are inserted around in-memory tiles (i.e. tiles 8 /// that were not assigned a physical SME tile). 15 /// During tile allocation if there's not a physical tile ID available an op 16 /// will be assigned an in-memory tile ID (which is a tile ID >= 16). 36 /// // Swap contents of %tileAlloca and tile 0 41 /// // Execute the op using tile [all...] |
/llvm-project/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/ |
H A D | outerproduct-f64.mlir | 34 %tile = vector.outerproduct %vector, %vector : vector<[2]xf64>, vector<[2]xf64> 36 // Print the tile. The smallest SVL is 128-bits so the tile will be at least 39 // CHECK: TILE BEGIN 42 // CHECK: TILE END 43 vector.print str "TILE BEGIN\n" 44 vector.print %tile : vector<[2]x[2]xf64> 45 vector.print str "TILE END\n" 60 %tile = vector.outerproduct %vector, %vector, %acc : vector<[2]xf64>, vector<[2]xf64> 62 // Print the tile [all...] |
H A D | outerproduct-f32.mlir | 27 %tile = vector.outerproduct %vector, %vector : vector<[4]xf32>, vector<[4]xf32> 29 // Print the tile. The smallest SVL is 128-bits so the tile will be at least 32 // WITHOUT-ACC: TILE BEGIN 37 // WITHOUT-ACC: TILE END 38 vector.print str "TILE BEGIN\n" 39 vector.print %tile : vector<[4]x[4]xf32> 40 vector.print str "TILE END\n" 52 %tile = vector.outerproduct %vector, %vector, %acc : vector<[4]xf32>, vector<[4]xf32> 54 // Print the tile [all...] |
H A D | transpose.mlir | 15 // Calculate the size of a 32-bit tile, e.g. ZA{n}.s. 39 // Load tile from "mem1". 40 %tile = vector.load %mem1[%c0] : memref<?xi32>, vector<[4]x[4]xi32> 42 // Transpose tile. 43 %transposed_tile = vector.transpose %tile, [1, 0] : vector<[4]x[4]xi32> to vector<[4]x[4]xi32> 45 // Dump the original tile. The smallest SVL is 128-bits so the tile will be at 48 // CHECK: TILE BEGIN 53 // CHECK: TILE END 54 vector.print str "TILE BEGI [all...] |
H A D | tile-fill.mlir | 8 // Integration test demonstrating filling a 32-bit element ZA tile with a 9 // non-zero constant via vector to tile (MOVA) ops. 12 // Fill a tile with '123'. This will get lowered to a 1-d vector splat of 13 // '123' and a loop that writes this vector to each tile slice in the ZA 14 // tile. 15 %tile = arith.constant dense<123> : vector<[4]x[4]xi32> 17 // Print the tile. The smallest SVL is 128-bits so the tile will be at least 20 // CHECK: TILE BEGIN 25 // CHECK: TILE EN [all...] |
H A D | vector-load-store.mlir | 13 // Integration tests demonstrating load/store to/from SME ZA tile. 46 // Dump "mem1". The smallest SVL is 128-bits so the tile will be at least 86 // Dump zeroed "mem2". The smallest SVL is 128-bits so the tile will be at 140 // Dump "mem2". The smallest SVL is 128-bits so the tile will be at least 156 // load and store are correctly preserved since the second tile is offset from 157 // the first tile. 168 // * the number of tile slices (1d vectors) in a 32-bit element tile. 176 // Fill memory that tile 1 will be loaded from with '1' and '2' for tile [all...] |
/llvm-project/mlir/include/mlir/Interfaces/ |
H A D | TilingInterface.td | 21 This interface allows operations to expose information needed to tile them. 25 the operation to be able to tile them. As a result an implementation of 26 the tiling algorithm (like `scf::tileUsingSCF`) can generate the inter-tile 28 tile any operation that implements the interface. 30 This interface is also meant to help with "tile and fuse", i.e. the process 33 b) Based on the tile of the producer used by the tiled consumer, 35 tile (and use it immediately in the consumer) 38 b) Based on the tile produced, materialize the tiled implementation of 39 a consumer that uses this tile. 40 Note that the tile an [all...] |
/llvm-project/mlir/test/Dialect/Linalg/ |
H A D | tile-indexed.mlir | 1 // RUN: mlir-opt %s -transform-interpreter -canonicalize -split-input-file | FileCheck %s -check-prefix=TILE-10n25 22 // TILE-10n25-DAG: [[$MAP:#[a-zA-Z0-9_]*]] = affine_map<(d0)[s0] -> (d0 + s0)> 23 // TILE-10n25-LABEL: func @indexed_vector 24 // TILE-10n25: %[[C10:.*]] = arith.constant 10 : index 25 // TILE-10n25: scf.for %[[J:.*]] = {{.*}} step %[[C10]] 26 // TILE-10n25: linalg.generic 27 // TILE-10n25: %[[I:.*]] = linalg.index 0 : index 28 // TILE-10n25: %[[NEW_I:.*]] = affine.apply [[$MAP]](%[[J]])[%[[I]]] 29 // TILE-10n25: linalg.yield %[[NEW_I]] : index 54 // TILE [all...] |
/llvm-project/mlir/include/mlir/Dialect/AMX/ |
H A D | AMX.td | 11 // The Intel Advanced Matrix Extensions (AMX) provide a tile matrix 12 // multiply unit (TMUL), a tile control register (TILECFG), and eight 13 // tile registers TMM0 through TMM7 (TILEDATA). 17 // of Intel AMX, such as configuration setup, tile sizes, instructions, 18 // and tile release. 44 The Intel Advanced Matrix Extensions (AMX) provide a tile matrix 45 multiply unit (TMUL), a tile control register (TILECFG), and eight 46 tile registers TMM0 through TMM7 (TILEDATA). 64 // AMX Tile definition. 76 def AMX_TileType : AMX_Type<"Tile", "til [all...] |
/llvm-project/mlir/include/mlir/Dialect/ArmSME/IR/ |
H A D | ArmSMEOps.td | 28 def ArmSMETileType : I32EnumAttr<"ArmSMETileType", "Arm SME tile type", 43 be assigned a tile ID, an i32 attribute, which specifies which virtual tile 45 type of the tile. This is summarized below: 47 | Tile Vector Types | Possible Tile IDs | 57 "Sets the tile ID for this operation.", 71 Returns the tile ID assigned to this operation. This will be null before 72 tile allocation. 84 "Returns the VectorType of the tile use [all...] |
/llvm-project/mlir/test/Dialect/MemRef/ |
H A D | normalize-memrefs.mlir | 167 #tile = affine_map < (i)->(i floordiv 4, i mod 4) > 174 func.func @multiple_argument_type(%A: memref<16xf64, #tile>, %B: f64, %C: memref<8xf64, #tile>, %D: memref<24xf64>) -> f64 { 175 %a = affine.load %A[0] : memref<16xf64, #tile> 177 affine.store %p, %A[10] : memref<16xf64, #tile> 178 call @single_argument_type(%C): (memref<8xf64, #tile>) -> () 191 func.func @single_argument_type(%C : memref<8xf64, #tile>) { 192 %a = memref.alloc(): memref<8xf64, #tile> 193 %b = memref.alloc(): memref<16xf64, #tile> 196 call @single_argument_type(%a): (memref<8xf64, #tile>) [all...] |
/llvm-project/mlir/include/mlir/Dialect/ArmSME/Utils/ |
H A D | Utils.h | 39 /// Returns true if `type` is a valid element type for an SME tile or false 43 /// Returns true if `vType` is a valid vector type for an SME tile or false 52 /// Returns the type of SME tile this vector type corresponds to, or none if the 53 /// vector type does not fit within an SME tile. 56 /// Verifies the tile ID (if set) on this tile operation is valid. 59 /// Generates a for loop over ZA tile slices where the induction variable is 60 /// the tile slice index and each iteration yields a new tile. Loop body is 61 /// built via `makeLoopBody`, which returns the next tile value. 66 /// Returns true if `vType` is a multiple of an SME tile size. Returns false if 67 /// the `vType` exactly matches the size of an SME tile. [all …]
|
/llvm-project/llvm/test/CodeGen/X86/AMX/ |
H A D | amx-combine.ll | 11 %t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1) 26 %t2 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t1) 38 %t2 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %t1) 52 %t2 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %t1) 67 %t2 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %t1) 69 %t3 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t2) 84 %t2 = call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> %t1) 86 %t3 = call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx %t2) 91 ; the shape is loaded after tile. 120 %b.tile.ptr = getelementptr inbounds i8, ptr %b, i64 64 [all …]
|
H A D | amx-fastconfig-spill.mir | 13 - { id: 2, class: tile } 16 - { id: 5, class: tile } 17 - { id: 6, class: tile } 18 - { id: 7, class: tile } 39 ; CHECK-NEXT: [[PTILEZEROV:%[0-9]+]]:tile = PTILEZEROV [[MOV16ri1]], [[MOV16ri]] 44 …; CHECK-NEXT: [[PTILELOADDV:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], [[LEA64r]], … 53 …; CHECK-NEXT: [[PTILELOADDV1:%[0-9]+]]:tile = PTILELOADDV %row, %col, [[LEA64r]], 1, [[MOV32ri64… 55 …; CHECK-NEXT: [[PTILELOADDV2:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.2, 1… 57 …; CHECK-NEXT: [[PTILELOADDV3:%[0-9]+]]:tile = PTILELOADDV [[MOV16ri1]], [[MOV16ri]], %stack.3, 1… 58 …; CHECK-NEXT: [[PTDPBSSDV:%[0-9]+]]:tile = PTDPBSSDV [[MOV16ri1]], [[MOV16ri]], [[MOV16ri]], kil… [all …]
|
/llvm-project/mlir/lib/Dialect/Affine/Transforms/ |
H A D | LoopTiling.cpp | 9 // This file implements a pass to tile loop nests. 40 #define DEBUG_TYPE "affine-loop-tile" 56 // Default tile size if nothing is provided. 59 // If true, tile sizes are set to avoid max/min in bounds if possible. 76 /// Reduces each tile size to the largest divisor of the corresponding trip 80 assert(band.size() == tileSizes->size() && "invalid tile size count"); in adjustToDivisorsOfTripCounts() 86 // Adjust the tile size to largest factor of the trip count less than in adjustToDivisorsOfTripCounts() 96 // Returns tile sizes to use. Checks CL options; if none are specified, sets it 98 // tile sizes assuming identity accesses / 1:1 tile siz [all...] |
/llvm-project/clang/test/CodeGen/X86/ |
H A D | amx_api.c | 15 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) in test_api() 16 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) in test_api() 38 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) in test_tile_loadd() 40 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) in test_tile_loadd() 46 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) in test_tile_stream_loadd() 48 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) in test_tile_stream_loadd() 54 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) in test_tile_dpbssd() 56 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile.to.vector.v256i32(x86_amx {{%.*}}) in test_tile_dpbssd() 62 //CHECK-DAG: call x86_amx @llvm.x86.cast.vector.to.tile.v256i32(<256 x i32> {{%.*}}) in test_tile_dpbsud() 64 //CHECK-DAG: call <256 x i32> @llvm.x86.cast.tile in test_tile_dpbsud() [all...] |
/llvm-project/mlir/lib/Dialect/ArmSME/Transforms/ |
H A D | TileAllocation.cpp | 16 // users to ensure the IR has been lowered to CF before invoking the tile 22 // Tile Overlaps 145 /// Allocates and returns a tile ID. Fails if there are no tiles left. in allocateTileId() 157 /// Acquires a specific tile ID. Asserts the tile is initially free. in releaseTileId() 161 "cannot acquire allocated tile!"); in releaseTileId() 165 /// Releases a previously allocated tile ID. in allocateInMemoryTileId() 169 "cannot release unallocated tile!"); in allocateInMemoryTileId() 173 /// Allocates an in-memory tile ID. 175 // Note: We never release in-memory tile ID [all...] |
/llvm-project/mlir/test/Dialect/ArmSME/ |
H A D | roundtrip.mlir | 163 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi8>, vector<[16]x[16]xi8> 172 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16> 181 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi32>, vector<[4]x[4]xi32> 190 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi64>, vector<[2]x[2]xi64> 199 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xi128>, vector<[1]x[1]xi128> 208 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xf16>, vector<[8]x[8]xf16> 217 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xbf16>, vector<[8]x[8]xbf16> 226 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xf32>, vector<[4]x[4]xf32> 235 %tile = arm_sme.tile_load %src[%c0, %c0] : memref<?x?xf64>, vector<[2]x[2]xf64> 244 %tile [all...] |
/llvm-project/mlir/lib/Conversion/ArmSMEToLLVM/ |
H A D | ArmSMEToLLVM.cpp | 136 "expected tile ID to be allocated before conversion to LLVM"); 140 /// Creates an alloca matching the size of tile used by `tileOp`. The alloca is in createAllocaForTile() 149 // Create an alloca matching the tile size of the `tileOp`. in createAllocaForTile() 163 /// Finds or creates an alloca for a spill of a tile. in getOrCreateAllocaForTile() 188 /// hardware tile ID) to ArmSME intrinsics. Currently, this works by assigning 189 /// the op to tile 0, then emitting a full tile swap between ZA and memory 190 /// before + after the tile op. 194 /// // Note: <IN MEMORY TILE> = tile I [all...] |