1// RUN: mlir-opt %s -generate-runtime-verification \ 2// RUN: -one-shot-bufferize="bufferize-function-boundaries" \ 3// RUN: -convert-linalg-to-loops \ 4// RUN: -expand-strided-metadata \ 5// RUN: -lower-affine \ 6// RUN: -convert-scf-to-cf \ 7// RUN: -test-cf-assert \ 8// RUN: -convert-index-to-llvm \ 9// RUN: -finalize-memref-to-llvm \ 10// RUN: -convert-func-to-llvm \ 11// RUN: -convert-arith-to-llvm \ 12// RUN: -convert-cf-to-llvm \ 13// RUN: -reconcile-unrealized-casts | \ 14// RUN: mlir-runner -e main -entry-point-result=void \ 15// RUN: -shared-libs=%mlir_runner_utils \ 16// RUN: -shared-libs=%mlir_c_runner_utils 2>&1 | \ 17// RUN: FileCheck %s 18 19func.func @main() { 20 %c5x = arith.constant dense<0.0> : tensor<5xf32> 21 %c4x = arith.constant dense<0.0> : tensor<4xf32> 22 %d5x = tensor.cast %c5x : tensor<5xf32> to tensor<?xf32> 23 %d4x = tensor.cast %c4x : tensor<4xf32> to tensor<?xf32> 24 25 // CHECK-NOT: ERROR: Runtime op verification failed 26 func.call @simple_add(%d5x, %d5x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>) 27 28 // CHECK: ERROR: Runtime op verification failed 29 // CHECK: linalg.generic 30 // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 31 func.call @simple_add(%d5x, %d4x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>) 32 33 // CHECK: ERROR: Runtime op verification failed 34 // CHECK: linalg.generic 35 // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 36 func.call @simple_add(%d4x, %d5x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>) 37 38 %c1x1 = arith.constant dense<0.0> : tensor<1x1xf32> 39 %c1x4 = arith.constant dense<0.0> : tensor<1x4xf32> 40 %c4x4 = arith.constant dense<0.0> : tensor<4x4xf32> 41 %c4x5 = arith.constant dense<0.0> : tensor<4x5xf32> 42 %c5x4 = arith.constant dense<0.0> : tensor<5x4xf32> 43 %d1x1 = tensor.cast %c1x1 : tensor<1x1xf32> to tensor<?x?xf32> 44 %d1x4 = tensor.cast %c1x4 : tensor<1x4xf32> to tensor<?x?xf32> 45 %d4x4 = tensor.cast %c4x4 : tensor<4x4xf32> to tensor<?x?xf32> 46 %d4x5 = tensor.cast %c4x5 : tensor<4x5xf32> to tensor<?x?xf32> 47 %d5x4 = tensor.cast %c5x4 : tensor<5x4xf32> to tensor<?x?xf32> 48 49 // CHECK-NOT: ERROR: Runtime op verification failed 50 func.call @broadcast_add(%d1x1, %d1x1) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 51 52 // CHECK-NOT: ERROR: Runtime op verification failed 53 func.call @broadcast_add(%d1x1, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 54 55 // CHECK-NOT: ERROR: Runtime op verification failed 56 func.call @broadcast_add(%d4x4, %d1x4) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 57 58 // CHECK: ERROR: Runtime op verification failed 59 // CHECK: linalg.generic 60 // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size 61 func.call @broadcast_add(%d1x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 62 63 // CHECK: ERROR: Runtime op verification failed 64 // CHECK: linalg.generic 65 // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 66 // CHECK: ERROR: Runtime op verification failed 67 // CHECK: linalg.generic 68 // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size 69 // CHECK: ERROR: Runtime op verification failed 70 // CHECK: linalg.generic 71 // CHECK: ^ dimension #1 of input/output operand #2 is incompatible with inferred dimension size 72 func.call @broadcast_add(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 73 74 // CHECK-NOT: ERROR: Runtime op verification failed 75 func.call @matmul_generic(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 76 77 // CHECK: ERROR: Runtime op verification failed 78 // CHECK: linalg.generic 79 // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 80 func.call @matmul_generic(%d4x5, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 81 82 // CHECK-NOT: ERROR: Runtime op verification failed 83 func.call @matmul_named(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 84 85 // CHECK: ERROR: Runtime op verification failed 86 // CHECK: linalg.matmul 87 // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size 88 func.call @matmul_named(%d4x5, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>) 89 90 %c64x57 = arith.constant dense<0.0> : tensor<16x29xf32> 91 %c3x4 = arith.constant dense<0.0> : tensor<3x4xf32> 92 93 // CHECK-NOT: ERROR: Runtime op verification failed 94 func.call @conv(%c64x57, %c3x4) : (tensor<16x29xf32>, tensor<3x4xf32>) -> (tensor<5x7xf32>) 95 96 // CHECK-NOT: ERROR: Runtime op verification failed 97 func.call @reverse_from_3(%d4x) : (tensor<?xf32>) -> (tensor<?xf32>) 98 99 // CHECK: ERROR: Runtime op verification failed 100 // CHECK: linalg.generic 101 // CHECK: unexpected negative result on dimension #0 of input/output operand #0 102 func.call @reverse_from_3(%d5x) : (tensor<?xf32>) -> (tensor<?xf32>) 103 104 return 105} 106 107 108#identity1D = affine_map<(d0) -> (d0)> 109 110func.func @simple_add(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> (tensor<?xf32>) { 111 %c0 = arith.constant 0 : index 112 %dim = tensor.dim %arg0, %c0 : tensor<?xf32> 113 %result = tensor.empty(%dim) : tensor<?xf32> 114 %0 = linalg.generic { 115 indexing_maps = [#identity1D, #identity1D, #identity1D], 116 iterator_types = ["parallel"] 117 } ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>) 118 outs(%result : tensor<?xf32>) { 119 ^bb0(%gen_arg1: f32, %gen_arg2: f32, %out: f32) : 120 %tmp1 = arith.addf %gen_arg1, %gen_arg2 : f32 121 linalg.yield %tmp1 : f32 122 } -> tensor<?xf32> 123 return %0 : tensor<?xf32> 124} 125 126#broadcastD0 = affine_map<(d0, d1) -> (0, d1)> 127#broadcastD1 = affine_map<(d0, d1) -> (d0, 0)> 128#identity2D = affine_map<(d0, d1) -> (d0, d1)> 129 130func.func @broadcast_add(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 131 // Calculate maximum dimension 0 132 %c0 = arith.constant 0 : index 133 %dim = tensor.dim %arg0, %c0 : tensor<?x?xf32> 134 %dim_0 = tensor.dim %arg1, %c0 : tensor<?x?xf32> 135 %0 = arith.maxui %dim, %dim_0 : index 136 137 // Calculate maximum dimension 1 138 %c1 = arith.constant 1 : index 139 %dim_1 = tensor.dim %arg0, %c1 : tensor<?x?xf32> 140 %dim_2 = tensor.dim %arg1, %c1 : tensor<?x?xf32> 141 %1 = arith.maxui %dim_1, %dim_2 : index 142 143 // Broadcast dimension 0 of %arg0 144 %dim_3 = tensor.dim %arg0, %c0 : tensor<?x?xf32> 145 %2 = arith.cmpi eq, %dim_3, %c1 : index 146 %3 = scf.if %2 -> (tensor<?x?xf32>) { 147 %dim_7 = tensor.dim %arg0, %c1 : tensor<?x?xf32> 148 %12 = tensor.empty(%0, %dim_7) : tensor<?x?xf32> 149 %13 = linalg.generic { 150 indexing_maps = [#broadcastD0, #identity2D], 151 iterator_types = ["parallel", "parallel"] 152 } ins(%arg0 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) { 153 ^bb0(%in: f32, %out: f32): 154 linalg.yield %in : f32 155 } -> tensor<?x?xf32> 156 scf.yield %13 : tensor<?x?xf32> 157 } else { 158 scf.yield %arg0 : tensor<?x?xf32> 159 } 160 161 // Broadcast dimension 1 of %arg0 162 %dim_4 = tensor.dim %3, %c1 : tensor<?x?xf32> 163 %4 = arith.cmpi eq, %dim_4, %c1 : index 164 %5 = scf.if %4 -> (tensor<?x?xf32>) { 165 %dim_7 = tensor.dim %3, %c0 : tensor<?x?xf32> 166 %12 = tensor.empty(%dim_7, %1) : tensor<?x?xf32> 167 %13 = linalg.generic { 168 indexing_maps = [#broadcastD1, #identity2D], 169 iterator_types = ["parallel", "parallel"] 170 } ins(%3 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) { 171 ^bb0(%in: f32, %out: f32): 172 linalg.yield %in : f32 173 } -> tensor<?x?xf32> 174 scf.yield %13 : tensor<?x?xf32> 175 } else { 176 scf.yield %3 : tensor<?x?xf32> 177 } 178 179 // Broadcast dimension 0 of %arg1 180 %dim_5 = tensor.dim %arg1, %c0 : tensor<?x?xf32> 181 %6 = arith.cmpi eq, %dim_5, %c1 : index 182 %7 = scf.if %6 -> (tensor<?x?xf32>) { 183 %dim_7 = tensor.dim %arg1, %c1 : tensor<?x?xf32> 184 %12 = tensor.empty(%0, %dim_7) : tensor<?x?xf32> 185 %13 = linalg.generic { 186 indexing_maps = [#broadcastD0, #identity2D], 187 iterator_types = ["parallel", "parallel"] 188 } ins(%arg1 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) { 189 ^bb0(%in: f32, %out: f32): 190 linalg.yield %in : f32 191 } -> tensor<?x?xf32> 192 scf.yield %13 : tensor<?x?xf32> 193 } else { 194 scf.yield %arg1 : tensor<?x?xf32> 195 } 196 197 // Broadcast dimension 1 of %arg1 198 %dim_6 = tensor.dim %7, %c1 : tensor<?x?xf32> 199 %8 = arith.cmpi eq, %dim_6, %c1 : index 200 %9 = scf.if %8 -> (tensor<?x?xf32>) { 201 %dim_7 = tensor.dim %7, %c0 : tensor<?x?xf32> 202 %12 = tensor.empty(%dim_7, %1) : tensor<?x?xf32> 203 %13 = linalg.generic { 204 indexing_maps = [#broadcastD1, #identity2D], 205 iterator_types = ["parallel", "parallel"] 206 } ins(%7 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) { 207 ^bb0(%in: f32, %out: f32): 208 linalg.yield %in : f32 209 } -> tensor<?x?xf32> 210 scf.yield %13 : tensor<?x?xf32> 211 } else { 212 scf.yield %7 : tensor<?x?xf32> 213 } 214 215 // Perform element-wise computation 216 %10 = tensor.empty(%0, %1) : tensor<?x?xf32> 217 %11 = linalg.generic { 218 indexing_maps = [#identity2D, #identity2D, #identity2D], 219 iterator_types = ["parallel", "parallel"] 220 } ins(%5, %9 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%10 : tensor<?x?xf32>) { 221 ^bb0(%in: f32, %in_7: f32, %out: f32): 222 %12 = arith.addf %in, %in_7 : f32 223 linalg.yield %12 : f32 224 } -> tensor<?x?xf32> 225 return %11 : tensor<?x?xf32> 226} 227 228#matmul_accesses = [ 229 affine_map<(m, n, k) -> (m, k)>, 230 affine_map<(m, n, k) -> (k, n)>, 231 affine_map<(m, n, k) -> (m, n)> 232] 233#matmul_trait = { 234 iterator_types = ["parallel", "parallel", "reduction"], 235 indexing_maps = #matmul_accesses 236} 237 238func.func @matmul_generic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 239 %cf0 = arith.constant 0.0 : f32 240 %ci0 = arith.constant 0 : index 241 %ci1 = arith.constant 1 : index 242 %d0 = tensor.dim %arg0, %ci0 : tensor<?x?xf32> 243 %d1 = tensor.dim %arg1, %ci1 : tensor<?x?xf32> 244 %splat = tensor.splat %cf0[%d0, %d1] : tensor<?x?xf32> 245 %0 = linalg.generic #matmul_trait ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%splat : tensor<?x?xf32>) { 246 ^bb0(%in: f32, %in_0: f32, %out: f32): 247 %1 = arith.mulf %in, %in_0 : f32 248 %2 = arith.addf %out, %1 : f32 249 linalg.yield %2 : f32 250 } -> tensor<?x?xf32> 251 return %0 : tensor<?x?xf32> 252} 253 254func.func @matmul_named(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 255 %cf0 = arith.constant 0.0 : f32 256 %ci0 = arith.constant 0 : index 257 %ci1 = arith.constant 1 : index 258 %d0 = tensor.dim %arg0, %ci0 : tensor<?x?xf32> 259 %d1 = tensor.dim %arg1, %ci1 : tensor<?x?xf32> 260 %splat = tensor.splat %cf0[%d0, %d1] : tensor<?x?xf32> 261 %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%splat : tensor<?x?xf32>) -> tensor<?x?xf32> 262 return %0 : tensor<?x?xf32> 263} 264 265#conv_trait = { 266 indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 * 3 + d2, d1 * 4 + d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], 267 iterator_types = ["parallel", "parallel", "reduction", "reduction"] 268} 269 270func.func @conv(%arg0: tensor<16x29xf32>, %arg1: tensor<3x4xf32>) -> (tensor<5x7xf32>) { 271 %c0 = arith.constant 0.0 : f32 272 %splat = tensor.splat %c0 : tensor<5x7xf32> 273 %result = linalg.generic #conv_trait ins(%arg0, %arg1 : tensor<16x29xf32>, tensor<3x4xf32>) outs(%splat : tensor<5x7xf32>) { 274 ^bb0(%in: f32, %in_64: f32, %out: f32): 275 %5 = arith.mulf %in, %in_64 : f32 276 %6 = arith.addf %out, %5 : f32 277 linalg.yield %6 : f32 278 } -> tensor<5x7xf32> 279 return %result : tensor<5x7xf32> 280} 281 282#reverse_trait = { 283 indexing_maps = [ 284 affine_map<(i) -> (3 - i)>, 285 affine_map<(i) -> (i)> 286 ], 287 iterator_types = ["parallel"] 288} 289 290func.func @reverse_from_3(%arg0: tensor<?xf32>) -> (tensor<?xf32>) { 291 %cf0 = arith.constant 0.0 : f32 292 %ci0 = arith.constant 0 : index 293 %d0 = tensor.dim %arg0, %ci0 : tensor<?xf32> 294 %splat = tensor.splat %cf0[%d0] : tensor<?xf32> 295 %result = linalg.generic #reverse_trait ins(%arg0: tensor<?xf32>) outs(%splat: tensor<?xf32>) { 296 ^bb0(%a: f32, %b: f32): 297 linalg.yield %a : f32 298 } -> tensor<?xf32> 299 return %result : tensor<?xf32> 300} 301