1d94aeb50SRyan Holt// RUN: mlir-opt %s -generate-runtime-verification \ 2d94aeb50SRyan Holt// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ 3d94aeb50SRyan Holt// RUN: -convert-linalg-to-loops \ 4d94aeb50SRyan Holt// RUN: -expand-strided-metadata \ 5d94aeb50SRyan Holt// RUN: -lower-affine \ 6d94aeb50SRyan Holt// RUN: -convert-scf-to-cf \ 7d94aeb50SRyan Holt// RUN: -test-cf-assert \ 8d94aeb50SRyan Holt// RUN: -convert-index-to-llvm \ 9d94aeb50SRyan Holt// RUN: -finalize-memref-to-llvm \ 10d94aeb50SRyan Holt// RUN: -convert-func-to-llvm \ 11b03a09e7SMatthias Springer// RUN: -convert-arith-to-llvm \ 12eb6c4197SMatthias Springer// RUN: -convert-cf-to-llvm \ 13d94aeb50SRyan Holt// RUN: -reconcile-unrealized-casts | \ 14*eb206e9eSAndrea Faulds// RUN: mlir-runner -e main -entry-point-result=void \ 15d94aeb50SRyan Holt// RUN: -shared-libs=%mlir_runner_utils \ 16d94aeb50SRyan Holt// RUN: -shared-libs=%mlir_c_runner_utils 2>&1 | \ 17d94aeb50SRyan Holt// RUN: FileCheck %s 18d94aeb50SRyan Holt 19d94aeb50SRyan Holtfunc.func @main() { 20d94aeb50SRyan Holt %c5x = arith.constant dense<0.0> : tensor<5xf32> 21d94aeb50SRyan Holt %c4x = arith.constant dense<0.0> : tensor<4xf32> 22d94aeb50SRyan Holt %d5x = tensor.cast %c5x : tensor<5xf32> to tensor<?xf32> 23d94aeb50SRyan Holt %d4x = tensor.cast %c4x : tensor<4xf32> to tensor<?xf32> 24d94aeb50SRyan Holt 25d94aeb50SRyan Holt // CHECK-NOT: ERROR: Runtime op verification failed 26d94aeb50SRyan Holt func.call @simple_add(%d5x, %d5x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>) 27d94aeb50SRyan Holt 28d94aeb50SRyan Holt // CHECK: ERROR: Runtime op verification failed 29d94aeb50SRyan Holt // CHECK: linalg.generic 30d94aeb50SRyan Holt // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 31d94aeb50SRyan Holt func.call @simple_add(%d5x, %d4x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>) 32d94aeb50SRyan Holt 33d94aeb50SRyan Holt // CHECK: ERROR: Runtime op verification failed 34d94aeb50SRyan Holt // CHECK: linalg.generic 35d94aeb50SRyan Holt // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 36d94aeb50SRyan Holt func.call @simple_add(%d4x, %d5x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>) 37d94aeb50SRyan Holt 38d94aeb50SRyan Holt %c1x1 = arith.constant dense<0.0> : tensor<1x1xf32> 39d94aeb50SRyan Holt %c1x4 = arith.constant dense<0.0> : tensor<1x4xf32> 40d94aeb50SRyan Holt %c4x4 = arith.constant dense<0.0> : tensor<4x4xf32> 41d94aeb50SRyan Holt %c4x5 = arith.constant dense<0.0> : tensor<4x5xf32> 42d94aeb50SRyan Holt %c5x4 = arith.constant dense<0.0> : tensor<5x4xf32> 43d94aeb50SRyan Holt %d1x1 = tensor.cast %c1x1 : tensor<1x1xf32> to tensor<?x?xf32> 44d94aeb50SRyan Holt %d1x4 = tensor.cast %c1x4 : tensor<1x4xf32> to tensor<?x?xf32> 45d94aeb50SRyan Holt %d4x4 = tensor.cast %c4x4 : tensor<4x4xf32> to tensor<?x?xf32> 46d94aeb50SRyan Holt %d4x5 = tensor.cast %c4x5 : tensor<4x5xf32> to tensor<?x?xf32> 47d94aeb50SRyan Holt %d5x4 = tensor.cast %c5x4 : tensor<5x4xf32> to tensor<?x?xf32> 48d94aeb50SRyan Holt 49d94aeb50SRyan Holt // CHECK-NOT: ERROR: Runtime op verification failed 50d94aeb50SRyan Holt func.call @broadcast_add(%d1x1, %d1x1) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 51d94aeb50SRyan Holt 52d94aeb50SRyan Holt // CHECK-NOT: ERROR: Runtime op verification failed 53d94aeb50SRyan Holt func.call @broadcast_add(%d1x1, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 54d94aeb50SRyan Holt 55d94aeb50SRyan Holt // CHECK-NOT: ERROR: Runtime op verification failed 56d94aeb50SRyan Holt func.call @broadcast_add(%d4x4, %d1x4) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 57d94aeb50SRyan Holt 58d94aeb50SRyan Holt // CHECK: ERROR: Runtime op verification failed 59d94aeb50SRyan Holt // CHECK: linalg.generic 60d94aeb50SRyan Holt // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size 61d94aeb50SRyan Holt func.call @broadcast_add(%d1x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 62d94aeb50SRyan Holt 63d94aeb50SRyan Holt // CHECK: ERROR: Runtime op verification failed 64d94aeb50SRyan Holt // CHECK: linalg.generic 65d94aeb50SRyan Holt // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 66d94aeb50SRyan Holt // CHECK: ERROR: Runtime op verification failed 67d94aeb50SRyan Holt // CHECK: linalg.generic 68d94aeb50SRyan Holt // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size 69d94aeb50SRyan Holt // CHECK: ERROR: Runtime op verification failed 70d94aeb50SRyan Holt // CHECK: linalg.generic 71d94aeb50SRyan Holt // CHECK: ^ dimension #1 of input/output operand #2 is incompatible with inferred dimension size 72d94aeb50SRyan Holt func.call @broadcast_add(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 73d94aeb50SRyan Holt 74d94aeb50SRyan Holt // CHECK-NOT: ERROR: Runtime op verification failed 75d94aeb50SRyan Holt func.call @matmul_generic(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 76d94aeb50SRyan Holt 77d94aeb50SRyan Holt // CHECK: ERROR: Runtime op verification failed 78d94aeb50SRyan Holt // CHECK: linalg.generic 79d94aeb50SRyan Holt // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 80d94aeb50SRyan Holt func.call @matmul_generic(%d4x5, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 81d94aeb50SRyan Holt 82d94aeb50SRyan Holt // CHECK-NOT: ERROR: Runtime op verification failed 83d94aeb50SRyan Holt func.call @matmul_named(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 84d94aeb50SRyan Holt 85d94aeb50SRyan Holt // CHECK: ERROR: Runtime op verification failed 86d94aeb50SRyan Holt // CHECK: linalg.matmul 87d94aeb50SRyan Holt // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 88d94aeb50SRyan Holt func.call @matmul_named(%d4x5, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 89d94aeb50SRyan Holt 90d94aeb50SRyan Holt %c64x57 = arith.constant dense<0.0> : tensor<16x29xf32> 91d94aeb50SRyan Holt %c3x4 = arith.constant dense<0.0> : tensor<3x4xf32> 92d94aeb50SRyan Holt 93d94aeb50SRyan Holt // CHECK-NOT: ERROR: Runtime op verification failed 94d94aeb50SRyan Holt func.call @conv(%c64x57, %c3x4) : (tensor<16x29xf32>, tensor<3x4xf32>) -> (tensor<5x7xf32>) 95d94aeb50SRyan Holt 96d94aeb50SRyan Holt // CHECK-NOT: ERROR: Runtime op verification failed 97d94aeb50SRyan Holt func.call @reverse_from_3(%d4x) : (tensor<?xf32>) -> (tensor<?xf32>) 98d94aeb50SRyan Holt 99d94aeb50SRyan Holt // CHECK: ERROR: Runtime op verification failed 100d94aeb50SRyan Holt // CHECK: linalg.generic 101d94aeb50SRyan Holt // CHECK: unexpected negative result on dimension #0 of input/output operand #0 102d94aeb50SRyan Holt func.call @reverse_from_3(%d5x) : (tensor<?xf32>) -> (tensor<?xf32>) 103d94aeb50SRyan Holt 104d94aeb50SRyan Holt return 105d94aeb50SRyan Holt} 106d94aeb50SRyan Holt 107d94aeb50SRyan Holt 108d94aeb50SRyan Holt#identity1D = affine_map<(d0) -> (d0)> 109d94aeb50SRyan Holt 110d94aeb50SRyan Holtfunc.func @simple_add(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> (tensor<?xf32>) { 111d94aeb50SRyan Holt %c0 = arith.constant 0 : index 112d94aeb50SRyan Holt %dim = tensor.dim %arg0, %c0 : tensor<?xf32> 113d94aeb50SRyan Holt %result = tensor.empty(%dim) : tensor<?xf32> 114d94aeb50SRyan Holt %0 = linalg.generic { 115d94aeb50SRyan Holt indexing_maps = [#identity1D, #identity1D, #identity1D], 116d94aeb50SRyan Holt iterator_types = ["parallel"] 117d94aeb50SRyan Holt } ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>) 118d94aeb50SRyan Holt outs(%result : tensor<?xf32>) { 119d94aeb50SRyan Holt ^bb0(%gen_arg1: f32, %gen_arg2: f32, %out: f32) : 120d94aeb50SRyan Holt %tmp1 = arith.addf %gen_arg1, %gen_arg2 : f32 121d94aeb50SRyan Holt linalg.yield %tmp1 : f32 122d94aeb50SRyan Holt } -> tensor<?xf32> 123d94aeb50SRyan Holt return %0 : tensor<?xf32> 124d94aeb50SRyan Holt} 125d94aeb50SRyan Holt 126d94aeb50SRyan Holt#broadcastD0 = affine_map<(d0, d1) -> (0, d1)> 127d94aeb50SRyan Holt#broadcastD1 = affine_map<(d0, d1) -> (d0, 0)> 128d94aeb50SRyan Holt#identity2D = affine_map<(d0, d1) -> (d0, d1)> 129d94aeb50SRyan Holt 130d94aeb50SRyan Holtfunc.func @broadcast_add(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 131d94aeb50SRyan Holt // Calculate maximum dimension 0 132d94aeb50SRyan Holt %c0 = arith.constant 0 : index 133d94aeb50SRyan Holt %dim = tensor.dim %arg0, %c0 : tensor<?x?xf32> 134d94aeb50SRyan Holt %dim_0 = tensor.dim %arg1, %c0 : tensor<?x?xf32> 135d94aeb50SRyan Holt %0 = arith.maxui %dim, %dim_0 : index 136d94aeb50SRyan Holt 137d94aeb50SRyan Holt // Calculate maximum dimension 1 138d94aeb50SRyan Holt %c1 = arith.constant 1 : index 139d94aeb50SRyan Holt %dim_1 = tensor.dim %arg0, %c1 : tensor<?x?xf32> 140d94aeb50SRyan Holt %dim_2 = tensor.dim %arg1, %c1 : tensor<?x?xf32> 141d94aeb50SRyan Holt %1 = arith.maxui %dim_1, %dim_2 : index 142d94aeb50SRyan Holt 143d94aeb50SRyan Holt // Broadcast dimension 0 of %arg0 144d94aeb50SRyan Holt %dim_3 = tensor.dim %arg0, %c0 : tensor<?x?xf32> 145d94aeb50SRyan Holt %2 = arith.cmpi eq, %dim_3, %c1 : index 146d94aeb50SRyan Holt %3 = scf.if %2 -> (tensor<?x?xf32>) { 147d94aeb50SRyan Holt %dim_7 = tensor.dim %arg0, %c1 : tensor<?x?xf32> 148d94aeb50SRyan Holt %12 = tensor.empty(%0, %dim_7) : tensor<?x?xf32> 149d94aeb50SRyan Holt %13 = linalg.generic { 150d94aeb50SRyan Holt indexing_maps = [#broadcastD0, #identity2D], 151d94aeb50SRyan Holt iterator_types = ["parallel", "parallel"] 152d94aeb50SRyan Holt } ins(%arg0 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) { 153d94aeb50SRyan Holt ^bb0(%in: f32, %out: f32): 154d94aeb50SRyan Holt linalg.yield %in : f32 155d94aeb50SRyan Holt } -> tensor<?x?xf32> 156d94aeb50SRyan Holt scf.yield %13 : tensor<?x?xf32> 157d94aeb50SRyan Holt } else { 158d94aeb50SRyan Holt scf.yield %arg0 : tensor<?x?xf32> 159d94aeb50SRyan Holt } 160d94aeb50SRyan Holt 161d94aeb50SRyan Holt // Broadcast dimension 1 of %arg0 162d94aeb50SRyan Holt %dim_4 = tensor.dim %3, %c1 : tensor<?x?xf32> 163d94aeb50SRyan Holt %4 = arith.cmpi eq, %dim_4, %c1 : index 164d94aeb50SRyan Holt %5 = scf.if %4 -> (tensor<?x?xf32>) { 165d94aeb50SRyan Holt %dim_7 = tensor.dim %3, %c0 : tensor<?x?xf32> 166d94aeb50SRyan Holt %12 = tensor.empty(%dim_7, %1) : tensor<?x?xf32> 167d94aeb50SRyan Holt %13 = linalg.generic { 168d94aeb50SRyan Holt indexing_maps = [#broadcastD1, #identity2D], 169d94aeb50SRyan Holt iterator_types = ["parallel", "parallel"] 170d94aeb50SRyan Holt } ins(%3 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) { 171d94aeb50SRyan Holt ^bb0(%in: f32, %out: f32): 172d94aeb50SRyan Holt linalg.yield %in : f32 173d94aeb50SRyan Holt } -> tensor<?x?xf32> 174d94aeb50SRyan Holt scf.yield %13 : tensor<?x?xf32> 175d94aeb50SRyan Holt } else { 176d94aeb50SRyan Holt scf.yield %3 : tensor<?x?xf32> 177d94aeb50SRyan Holt } 178d94aeb50SRyan Holt 179d94aeb50SRyan Holt // Broadcast dimension 0 of %arg1 180d94aeb50SRyan Holt %dim_5 = tensor.dim %arg1, %c0 : tensor<?x?xf32> 181d94aeb50SRyan Holt %6 = arith.cmpi eq, %dim_5, %c1 : index 182d94aeb50SRyan Holt %7 = scf.if %6 -> (tensor<?x?xf32>) { 183d94aeb50SRyan Holt %dim_7 = tensor.dim %arg1, %c1 : tensor<?x?xf32> 184d94aeb50SRyan Holt %12 = tensor.empty(%0, %dim_7) : tensor<?x?xf32> 185d94aeb50SRyan Holt %13 = linalg.generic { 186d94aeb50SRyan Holt indexing_maps = [#broadcastD0, #identity2D], 187d94aeb50SRyan Holt iterator_types = ["parallel", "parallel"] 188d94aeb50SRyan Holt } ins(%arg1 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) { 189d94aeb50SRyan Holt ^bb0(%in: f32, %out: f32): 190d94aeb50SRyan Holt linalg.yield %in : f32 191d94aeb50SRyan Holt } -> tensor<?x?xf32> 192d94aeb50SRyan Holt scf.yield %13 : tensor<?x?xf32> 193d94aeb50SRyan Holt } else { 194d94aeb50SRyan Holt scf.yield %arg1 : tensor<?x?xf32> 195d94aeb50SRyan Holt } 196d94aeb50SRyan Holt 197d94aeb50SRyan Holt // Broadcast dimension 1 of %arg1 198d94aeb50SRyan Holt %dim_6 = tensor.dim %7, %c1 : tensor<?x?xf32> 199d94aeb50SRyan Holt %8 = arith.cmpi eq, %dim_6, %c1 : index 200d94aeb50SRyan Holt %9 = scf.if %8 -> (tensor<?x?xf32>) { 201d94aeb50SRyan Holt %dim_7 = tensor.dim %7, %c0 : tensor<?x?xf32> 202d94aeb50SRyan Holt %12 = tensor.empty(%dim_7, %1) : tensor<?x?xf32> 203d94aeb50SRyan Holt %13 = linalg.generic { 204d94aeb50SRyan Holt indexing_maps = [#broadcastD1, #identity2D], 205d94aeb50SRyan Holt iterator_types = ["parallel", "parallel"] 206d94aeb50SRyan Holt } ins(%7 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) { 207d94aeb50SRyan Holt ^bb0(%in: f32, %out: f32): 208d94aeb50SRyan Holt linalg.yield %in : f32 209d94aeb50SRyan Holt } -> tensor<?x?xf32> 210d94aeb50SRyan Holt scf.yield %13 : tensor<?x?xf32> 211d94aeb50SRyan Holt } else { 212d94aeb50SRyan Holt scf.yield %7 : tensor<?x?xf32> 213d94aeb50SRyan Holt } 214d94aeb50SRyan Holt 215d94aeb50SRyan Holt // Perform element-wise computation 216d94aeb50SRyan Holt %10 = tensor.empty(%0, %1) : tensor<?x?xf32> 217d94aeb50SRyan Holt %11 = linalg.generic { 218d94aeb50SRyan Holt indexing_maps = [#identity2D, #identity2D, #identity2D], 219d94aeb50SRyan Holt iterator_types = ["parallel", "parallel"] 220d94aeb50SRyan Holt } ins(%5, %9 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%10 : tensor<?x?xf32>) { 221d94aeb50SRyan Holt ^bb0(%in: f32, %in_7: f32, %out: f32): 222d94aeb50SRyan Holt %12 = arith.addf %in, %in_7 : f32 223d94aeb50SRyan Holt linalg.yield %12 : f32 224d94aeb50SRyan Holt } -> tensor<?x?xf32> 225d94aeb50SRyan Holt return %11 : tensor<?x?xf32> 226d94aeb50SRyan Holt} 227d94aeb50SRyan Holt 228d94aeb50SRyan Holt#matmul_accesses = [ 229d94aeb50SRyan Holt affine_map<(m, n, k) -> (m, k)>, 230d94aeb50SRyan Holt affine_map<(m, n, k) -> (k, n)>, 231d94aeb50SRyan Holt affine_map<(m, n, k) -> (m, n)> 232d94aeb50SRyan Holt] 233d94aeb50SRyan Holt#matmul_trait = { 234d94aeb50SRyan Holt iterator_types = ["parallel", "parallel", "reduction"], 235d94aeb50SRyan Holt indexing_maps = #matmul_accesses 236d94aeb50SRyan Holt} 237d94aeb50SRyan Holt 238d94aeb50SRyan Holtfunc.func @matmul_generic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 239d94aeb50SRyan Holt %cf0 = arith.constant 0.0 : f32 240d94aeb50SRyan Holt %ci0 = arith.constant 0 : index 241d94aeb50SRyan Holt %ci1 = arith.constant 1 : index 242d94aeb50SRyan Holt %d0 = tensor.dim %arg0, %ci0 : tensor<?x?xf32> 243d94aeb50SRyan Holt %d1 = tensor.dim %arg1, %ci1 : tensor<?x?xf32> 244d94aeb50SRyan Holt %splat = tensor.splat %cf0[%d0, %d1] : tensor<?x?xf32> 245d94aeb50SRyan Holt %0 = linalg.generic #matmul_trait ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%splat : tensor<?x?xf32>) { 246d94aeb50SRyan Holt ^bb0(%in: f32, %in_0: f32, %out: f32): 247d94aeb50SRyan Holt %1 = arith.mulf %in, %in_0 : f32 248d94aeb50SRyan Holt %2 = arith.addf %out, %1 : f32 249d94aeb50SRyan Holt linalg.yield %2 : f32 250d94aeb50SRyan Holt } -> tensor<?x?xf32> 251d94aeb50SRyan Holt return %0 : tensor<?x?xf32> 252d94aeb50SRyan Holt} 253d94aeb50SRyan Holt 254d94aeb50SRyan Holtfunc.func @matmul_named(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 255d94aeb50SRyan Holt %cf0 = arith.constant 0.0 : f32 256d94aeb50SRyan Holt %ci0 = arith.constant 0 : index 257d94aeb50SRyan Holt %ci1 = arith.constant 1 : index 258d94aeb50SRyan Holt %d0 = tensor.dim %arg0, %ci0 : tensor<?x?xf32> 259d94aeb50SRyan Holt %d1 = tensor.dim %arg1, %ci1 : tensor<?x?xf32> 260d94aeb50SRyan Holt %splat = tensor.splat %cf0[%d0, %d1] : tensor<?x?xf32> 261d94aeb50SRyan Holt %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%splat : tensor<?x?xf32>) -> tensor<?x?xf32> 262d94aeb50SRyan Holt return %0 : tensor<?x?xf32> 263d94aeb50SRyan Holt} 264d94aeb50SRyan Holt 265d94aeb50SRyan Holt#conv_trait = { 266d94aeb50SRyan Holt indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 * 3 + d2, d1 * 4 + d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], 267d94aeb50SRyan Holt iterator_types = ["parallel", "parallel", "reduction", "reduction"] 268d94aeb50SRyan Holt} 269d94aeb50SRyan Holt 270d94aeb50SRyan Holtfunc.func @conv(%arg0: tensor<16x29xf32>, %arg1: tensor<3x4xf32>) -> (tensor<5x7xf32>) { 271d94aeb50SRyan Holt %c0 = arith.constant 0.0 : f32 272d94aeb50SRyan Holt %splat = tensor.splat %c0 : tensor<5x7xf32> 273d94aeb50SRyan Holt %result = linalg.generic #conv_trait ins(%arg0, %arg1 : tensor<16x29xf32>, tensor<3x4xf32>) outs(%splat : tensor<5x7xf32>) { 274d94aeb50SRyan Holt ^bb0(%in: f32, %in_64: f32, %out: f32): 275d94aeb50SRyan Holt %5 = arith.mulf %in, %in_64 : f32 276d94aeb50SRyan Holt %6 = arith.addf %out, %5 : f32 277d94aeb50SRyan Holt linalg.yield %6 : f32 278d94aeb50SRyan Holt } -> tensor<5x7xf32> 279d94aeb50SRyan Holt return %result : tensor<5x7xf32> 280d94aeb50SRyan Holt} 281d94aeb50SRyan Holt 282d94aeb50SRyan Holt#reverse_trait = { 283d94aeb50SRyan Holt indexing_maps = [ 284d94aeb50SRyan Holt affine_map<(i) -> (3 - i)>, 285d94aeb50SRyan Holt affine_map<(i) -> (i)> 286d94aeb50SRyan Holt ], 287d94aeb50SRyan Holt iterator_types = ["parallel"] 288d94aeb50SRyan Holt} 289d94aeb50SRyan Holt 290d94aeb50SRyan Holtfunc.func @reverse_from_3(%arg0: tensor<?xf32>) -> (tensor<?xf32>) { 291d94aeb50SRyan Holt %cf0 = arith.constant 0.0 : f32 292d94aeb50SRyan Holt %ci0 = arith.constant 0 : index 293d94aeb50SRyan Holt %d0 = tensor.dim %arg0, %ci0 : tensor<?xf32> 294d94aeb50SRyan Holt %splat = tensor.splat %cf0[%d0] : tensor<?xf32> 295d94aeb50SRyan Holt %result = linalg.generic #reverse_trait ins(%arg0: tensor<?xf32>) outs(%splat: tensor<?xf32>) { 296d94aeb50SRyan Holt ^bb0(%a: f32, %b: f32): 297d94aeb50SRyan Holt linalg.yield %a : f32 298d94aeb50SRyan Holt } -> tensor<?xf32> 299d94aeb50SRyan Holt return %result : tensor<?xf32> 300d94aeb50SRyan Holt} 301