xref: /llvm-project/mlir/test/Integration/Dialect/Linalg/CPU/runtime-verification.mlir (revision eb206e9ea84eff0a0596fed2de8316d924f946d1)
1// RUN: mlir-opt %s -generate-runtime-verification \
2// RUN: -one-shot-bufferize="bufferize-function-boundaries" \
3// RUN: -convert-linalg-to-loops \
4// RUN: -expand-strided-metadata \
5// RUN: -lower-affine \
6// RUN: -convert-scf-to-cf \
7// RUN: -test-cf-assert \
8// RUN: -convert-index-to-llvm \
9// RUN: -finalize-memref-to-llvm \
10// RUN: -convert-func-to-llvm \
11// RUN: -convert-arith-to-llvm \
12// RUN: -convert-cf-to-llvm \
13// RUN: -reconcile-unrealized-casts | \
14// RUN: mlir-runner -e main -entry-point-result=void \
15// RUN:     -shared-libs=%mlir_runner_utils \
16// RUN:     -shared-libs=%mlir_c_runner_utils 2>&1 | \
17// RUN: FileCheck %s
18
19func.func @main() {
20  %c5x = arith.constant dense<0.0> : tensor<5xf32>
21  %c4x = arith.constant dense<0.0> : tensor<4xf32>
22  %d5x = tensor.cast %c5x : tensor<5xf32> to tensor<?xf32>
23  %d4x = tensor.cast %c4x : tensor<4xf32> to tensor<?xf32>
24
25  // CHECK-NOT: ERROR: Runtime op verification failed
26  func.call @simple_add(%d5x, %d5x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>)
27
28  // CHECK: ERROR: Runtime op verification failed
29  // CHECK: linalg.generic
30  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
31  func.call @simple_add(%d5x, %d4x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>)
32
33  // CHECK: ERROR: Runtime op verification failed
34  // CHECK: linalg.generic
35  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
36  func.call @simple_add(%d4x, %d5x) : (tensor<?xf32>, tensor<?xf32>) -> (tensor<?xf32>)
37
38  %c1x1 = arith.constant dense<0.0> : tensor<1x1xf32>
39  %c1x4 = arith.constant dense<0.0> : tensor<1x4xf32>
40  %c4x4 = arith.constant dense<0.0> : tensor<4x4xf32>
41  %c4x5 = arith.constant dense<0.0> : tensor<4x5xf32>
42  %c5x4 = arith.constant dense<0.0> : tensor<5x4xf32>
43  %d1x1 = tensor.cast %c1x1 : tensor<1x1xf32> to tensor<?x?xf32>
44  %d1x4 = tensor.cast %c1x4 : tensor<1x4xf32> to tensor<?x?xf32>
45  %d4x4 = tensor.cast %c4x4 : tensor<4x4xf32> to tensor<?x?xf32>
46  %d4x5 = tensor.cast %c4x5 : tensor<4x5xf32> to tensor<?x?xf32>
47  %d5x4 = tensor.cast %c5x4 : tensor<5x4xf32> to tensor<?x?xf32>
48
49  // CHECK-NOT: ERROR: Runtime op verification failed
50  func.call @broadcast_add(%d1x1, %d1x1) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
51
52  // CHECK-NOT: ERROR: Runtime op verification failed
53  func.call @broadcast_add(%d1x1, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
54
55  // CHECK-NOT: ERROR: Runtime op verification failed
56  func.call @broadcast_add(%d4x4, %d1x4) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
57
58  // CHECK: ERROR: Runtime op verification failed
59  // CHECK: linalg.generic
60  // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size
61  func.call @broadcast_add(%d1x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
62
63  // CHECK: ERROR: Runtime op verification failed
64  // CHECK: linalg.generic
65  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
66  // CHECK: ERROR: Runtime op verification failed
67  // CHECK: linalg.generic
68  // CHECK: ^ dimension #1 of input/output operand #1 is incompatible with inferred dimension size
69  // CHECK: ERROR: Runtime op verification failed
70  // CHECK: linalg.generic
71  // CHECK: ^ dimension #1 of input/output operand #2 is incompatible with inferred dimension size
72  func.call @broadcast_add(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
73
74  // CHECK-NOT: ERROR: Runtime op verification failed
75  func.call @matmul_generic(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
76
77  // CHECK: ERROR: Runtime op verification failed
78  // CHECK: linalg.generic
79  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
80  func.call @matmul_generic(%d4x5, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
81
82  // CHECK-NOT: ERROR: Runtime op verification failed
83  func.call @matmul_named(%d5x4, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
84
85  // CHECK: ERROR: Runtime op verification failed
86  // CHECK: linalg.matmul
87  // CHECK: ^ dimension #0 of input/output operand #1 is incompatible with inferred dimension size
88  func.call @matmul_named(%d4x5, %d4x5) : (tensor<?x?xf32>, tensor<?x?xf32>) -> (tensor<?x?xf32>)
89
90  %c64x57 = arith.constant dense<0.0> : tensor<16x29xf32>
91  %c3x4 = arith.constant dense<0.0> : tensor<3x4xf32>
92
93  // CHECK-NOT: ERROR: Runtime op verification failed
94  func.call @conv(%c64x57, %c3x4) : (tensor<16x29xf32>, tensor<3x4xf32>) -> (tensor<5x7xf32>)
95
96  // CHECK-NOT: ERROR: Runtime op verification failed
97  func.call @reverse_from_3(%d4x) : (tensor<?xf32>) -> (tensor<?xf32>)
98
99  // CHECK: ERROR: Runtime op verification failed
100  // CHECK: linalg.generic
101  // CHECK: unexpected negative result on dimension #0 of input/output operand #0
102  func.call @reverse_from_3(%d5x) : (tensor<?xf32>) -> (tensor<?xf32>)
103
104  return
105}
106
107
108#identity1D = affine_map<(d0) -> (d0)>
109
110func.func @simple_add(%arg0: tensor<?xf32>, %arg1: tensor<?xf32>) -> (tensor<?xf32>) {
111    %c0 = arith.constant 0 : index
112    %dim = tensor.dim %arg0, %c0 : tensor<?xf32>
113    %result = tensor.empty(%dim) : tensor<?xf32>
114    %0 = linalg.generic {
115      indexing_maps = [#identity1D, #identity1D, #identity1D],
116      iterator_types = ["parallel"]
117    } ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
118      outs(%result : tensor<?xf32>) {
119      ^bb0(%gen_arg1: f32, %gen_arg2: f32, %out: f32) :
120        %tmp1 = arith.addf %gen_arg1, %gen_arg2 : f32
121        linalg.yield %tmp1 : f32
122    } -> tensor<?xf32>
123    return %0 : tensor<?xf32>
124}
125
126#broadcastD0 = affine_map<(d0, d1) -> (0, d1)>
127#broadcastD1 = affine_map<(d0, d1) -> (d0, 0)>
128#identity2D = affine_map<(d0, d1) -> (d0, d1)>
129
130func.func @broadcast_add(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
131 // Calculate maximum dimension 0
132  %c0 = arith.constant 0 : index
133  %dim = tensor.dim %arg0, %c0 : tensor<?x?xf32>
134  %dim_0 = tensor.dim %arg1, %c0 : tensor<?x?xf32>
135  %0 = arith.maxui %dim, %dim_0 : index
136
137  // Calculate maximum dimension 1
138  %c1 = arith.constant 1 : index
139  %dim_1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
140  %dim_2 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
141  %1 = arith.maxui %dim_1, %dim_2 : index
142
143  // Broadcast dimension 0 of %arg0
144  %dim_3 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
145  %2 = arith.cmpi eq, %dim_3, %c1 : index
146  %3 = scf.if %2 -> (tensor<?x?xf32>) {
147    %dim_7 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
148    %12 = tensor.empty(%0, %dim_7) : tensor<?x?xf32>
149    %13 = linalg.generic {
150      indexing_maps = [#broadcastD0, #identity2D],
151      iterator_types = ["parallel", "parallel"]
152    } ins(%arg0 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) {
153    ^bb0(%in: f32, %out: f32):
154      linalg.yield %in : f32
155    } -> tensor<?x?xf32>
156    scf.yield %13 : tensor<?x?xf32>
157  } else {
158    scf.yield %arg0 : tensor<?x?xf32>
159  }
160
161  // Broadcast dimension 1 of %arg0
162  %dim_4 = tensor.dim %3, %c1 : tensor<?x?xf32>
163  %4 = arith.cmpi eq, %dim_4, %c1 : index
164  %5 = scf.if %4 -> (tensor<?x?xf32>) {
165    %dim_7 = tensor.dim %3, %c0 : tensor<?x?xf32>
166    %12 = tensor.empty(%dim_7, %1) : tensor<?x?xf32>
167    %13 = linalg.generic {
168      indexing_maps = [#broadcastD1, #identity2D],
169      iterator_types = ["parallel", "parallel"]
170    } ins(%3 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) {
171    ^bb0(%in: f32, %out: f32):
172      linalg.yield %in : f32
173    } -> tensor<?x?xf32>
174    scf.yield %13 : tensor<?x?xf32>
175  } else {
176    scf.yield %3 : tensor<?x?xf32>
177  }
178
179  // Broadcast dimension 0 of %arg1
180  %dim_5 = tensor.dim %arg1, %c0 : tensor<?x?xf32>
181  %6 = arith.cmpi eq, %dim_5, %c1 : index
182  %7 = scf.if %6 -> (tensor<?x?xf32>) {
183    %dim_7 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
184    %12 = tensor.empty(%0, %dim_7) : tensor<?x?xf32>
185    %13 = linalg.generic {
186      indexing_maps = [#broadcastD0, #identity2D],
187      iterator_types = ["parallel", "parallel"]
188    } ins(%arg1 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) {
189    ^bb0(%in: f32, %out: f32):
190      linalg.yield %in : f32
191    } -> tensor<?x?xf32>
192    scf.yield %13 : tensor<?x?xf32>
193  } else {
194    scf.yield %arg1 : tensor<?x?xf32>
195  }
196
197  // Broadcast dimension 1 of %arg1
198  %dim_6 = tensor.dim %7, %c1 : tensor<?x?xf32>
199  %8 = arith.cmpi eq, %dim_6, %c1 : index
200  %9 = scf.if %8 -> (tensor<?x?xf32>) {
201    %dim_7 = tensor.dim %7, %c0 : tensor<?x?xf32>
202    %12 = tensor.empty(%dim_7, %1) : tensor<?x?xf32>
203    %13 = linalg.generic {
204      indexing_maps = [#broadcastD1, #identity2D],
205      iterator_types = ["parallel", "parallel"]
206    } ins(%7 : tensor<?x?xf32>) outs(%12 : tensor<?x?xf32>) {
207    ^bb0(%in: f32, %out: f32):
208      linalg.yield %in : f32
209    } -> tensor<?x?xf32>
210    scf.yield %13 : tensor<?x?xf32>
211  } else {
212    scf.yield %7 : tensor<?x?xf32>
213  }
214
215  // Perform element-wise computation
216  %10 = tensor.empty(%0, %1) : tensor<?x?xf32>
217  %11 = linalg.generic {
218    indexing_maps = [#identity2D, #identity2D, #identity2D],
219    iterator_types = ["parallel", "parallel"]
220  } ins(%5, %9 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%10 : tensor<?x?xf32>) {
221  ^bb0(%in: f32, %in_7: f32, %out: f32):
222    %12 = arith.addf %in, %in_7 : f32
223    linalg.yield %12 : f32
224  } -> tensor<?x?xf32>
225  return %11 : tensor<?x?xf32>
226}
227
228#matmul_accesses = [
229  affine_map<(m, n, k) -> (m, k)>,
230  affine_map<(m, n, k) -> (k, n)>,
231  affine_map<(m, n, k) -> (m, n)>
232]
233#matmul_trait = {
234  iterator_types = ["parallel", "parallel", "reduction"],
235  indexing_maps = #matmul_accesses
236}
237
238func.func @matmul_generic(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
239  %cf0 = arith.constant 0.0 : f32
240  %ci0 = arith.constant 0 : index
241  %ci1 = arith.constant 1 : index
242  %d0 = tensor.dim %arg0, %ci0 : tensor<?x?xf32>
243  %d1 = tensor.dim %arg1, %ci1 : tensor<?x?xf32>
244  %splat = tensor.splat %cf0[%d0, %d1] : tensor<?x?xf32>
245  %0 = linalg.generic #matmul_trait ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%splat : tensor<?x?xf32>) {
246  ^bb0(%in: f32, %in_0: f32, %out: f32):
247    %1 = arith.mulf %in, %in_0 : f32
248    %2 = arith.addf %out, %1 : f32
249    linalg.yield %2 : f32
250  } -> tensor<?x?xf32>
251  return %0 : tensor<?x?xf32>
252}
253
254func.func @matmul_named(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> {
255  %cf0 = arith.constant 0.0 : f32
256  %ci0 = arith.constant 0 : index
257  %ci1 = arith.constant 1 : index
258  %d0 = tensor.dim %arg0, %ci0 : tensor<?x?xf32>
259  %d1 = tensor.dim %arg1, %ci1 : tensor<?x?xf32>
260  %splat = tensor.splat %cf0[%d0, %d1] : tensor<?x?xf32>
261  %0 = linalg.matmul  ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%splat : tensor<?x?xf32>) -> tensor<?x?xf32>
262  return %0 : tensor<?x?xf32>
263}
264
265#conv_trait = {
266  indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 * 3 + d2, d1 * 4 + d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
267  iterator_types = ["parallel", "parallel", "reduction", "reduction"]
268}
269
270func.func @conv(%arg0: tensor<16x29xf32>, %arg1: tensor<3x4xf32>) -> (tensor<5x7xf32>) {
271  %c0 = arith.constant 0.0 : f32
272  %splat = tensor.splat %c0 : tensor<5x7xf32>
273  %result = linalg.generic #conv_trait ins(%arg0, %arg1 : tensor<16x29xf32>, tensor<3x4xf32>) outs(%splat : tensor<5x7xf32>) {
274  ^bb0(%in: f32, %in_64: f32, %out: f32):
275    %5 = arith.mulf %in, %in_64 : f32
276    %6 = arith.addf %out, %5 : f32
277    linalg.yield %6 : f32
278  } -> tensor<5x7xf32>
279  return %result : tensor<5x7xf32>
280}
281
282#reverse_trait = {
283  indexing_maps = [
284          affine_map<(i) -> (3 - i)>,
285          affine_map<(i) -> (i)>
286  ],
287  iterator_types = ["parallel"]
288}
289
290func.func @reverse_from_3(%arg0: tensor<?xf32>) -> (tensor<?xf32>) {
291  %cf0 = arith.constant 0.0 : f32
292  %ci0 = arith.constant 0 : index
293  %d0 = tensor.dim %arg0, %ci0 : tensor<?xf32>
294  %splat = tensor.splat %cf0[%d0] : tensor<?xf32>
295  %result = linalg.generic #reverse_trait ins(%arg0: tensor<?xf32>) outs(%splat: tensor<?xf32>) {
296    ^bb0(%a: f32, %b: f32):
297    linalg.yield %a : f32
298  } -> tensor<?xf32>
299  return %result : tensor<?xf32>
300}
301