xref: /llvm-project/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir (revision e4384149b58f7c3d19c5d38bc46038c660b77ca9)
1// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s
2
3// CHECK-LABEL: @vectorize_matmul
4// CHECK-SAME: %[[A:.*]]: tensor<24x12xf32>
5// CHECK-SAME: %[[B:.*]]: tensor<12x25xf32>
6// CHECK-SAME: %[[C:.*]]: tensor<24x25xf32>
7func.func @vectorize_matmul(%arg0: tensor<24x12xf32>,
8                            %arg1: tensor<12x25xf32>,
9                            %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
10  // CHECK: %[[vA:.+]] = vector.transfer_read %[[A]]
11  // CHECK: %[[vB:.+]] = vector.transfer_read %[[B]]
12  // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
13  // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
14  // CHECK: vector.transfer_write %[[vR]], %[[C]]
15  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
16  func.return %0 : tensor<24x25xf32>
17}
18
19module attributes {transform.with_named_sequence} {
20  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
21    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
22    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
23    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
24    transform.yield
25  }
26}
27
28// -----
29
30// CHECK-LABEL: @vectorize_matmul_memref
31// CHECK-SAME: %[[A:.*]]: memref<24x12xf32>
32// CHECK-SAME: %[[B:.*]]: memref<12x25xf32>
33// CHECK-SAME: %[[C:.*]]: memref<24x25xf32>
34func.func @vectorize_matmul_memref(%arg0: memref<24x12xf32>,
35                                   %arg1: memref<12x25xf32>,
36                                   %arg2: memref<24x25xf32>) {
37  // CHECK: %[[vA:.+]] = vector.transfer_read %[[A]]
38  // CHECK: %[[vB:.+]] = vector.transfer_read %[[B]]
39  // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
40  // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
41  // CHECK: vector.transfer_write %[[vR]], %[[C]]
42  linalg.matmul ins(%arg0, %arg1 : memref<24x12xf32>, memref<12x25xf32>) outs(%arg2 : memref<24x25xf32>)
43  return
44}
45
46module attributes {transform.with_named_sequence} {
47  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
48    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
49    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
50    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
51    transform.yield
52  }
53}
54
55// -----
56
57// CHECK-LABEL: @vectorize_copy_memref
58// CHECK-SAME: %[[A:.*]]: memref<100x100xf32>,
59// CHECK-SAME: %[[B:.*]]: memref<100x100xf32>
60func.func @vectorize_copy_memref(%arg0: memref<100x100xf32>,
61                                 %arg1: memref<100x100xf32>) {
62  // CHECK: %[[vA:.+]] = vector.transfer_read %[[A]]
63  // CHECK: vector.transfer_write %[[vA]], %[[B]]
64  linalg.copy ins(%arg0 : memref<100x100xf32>) outs(%arg1 : memref<100x100xf32>)
65  return
66}
67
68module attributes {transform.with_named_sequence} {
69  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
70    %0 = transform.structured.match ops{["linalg.copy"]} in %arg1 : (!transform.any_op) -> !transform.any_op
71    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
72    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
73    transform.yield
74  }
75}
76
77// -----
78
79#map0 = affine_map<()[s0] -> (-s0 + 12, 7)>
80#map1 = affine_map<()[s0] -> (-s0 + 7)>
81
82// CHECK-LABEL: @vectorize_keep_pad
83// CHECK-SAME: %[[C:[a-zA-Z0-9_]+]]: tensor<24x25xf32>
84func.func @vectorize_keep_pad(
85    %arg0: tensor<24x12xf32>, %arg1: tensor<12x25xf32>,
86    %arg2: tensor<24x25xf32>, %arg3: index, %arg4: index,
87    %arg5: index) -> tensor<24x25xf32> {
88  %c0 = arith.constant 0 : index
89  %cst = arith.constant 0.000000e+00 : f32
90  %0 = affine.min #map0()[%arg5]
91  %1 = tensor.extract_slice %arg0[%arg3, %arg5] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
92  %2 = tensor.extract_slice %arg1[%arg5, %arg4] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
93  %3 = tensor.extract_slice %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
94  %4 = affine.apply #map1()[%0]
95  // CHECK: %[[pA:.*]] = tensor.pad
96  %5 = tensor.pad %1 nofold low[%c0, %c0] high[%c0, %4] {
97  ^bb0(%arg6: index, %arg7: index):
98    tensor.yield %cst : f32
99  } : tensor<4x?xf32> to tensor<4x7xf32>
100  %6 = affine.apply #map1()[%0]
101  // CHECK: %[[pB:.*]] = tensor.pad
102  %7 = tensor.pad %2 nofold low[%c0, %c0] high[%6, %c0] {
103  ^bb0(%arg6: index, %arg7: index):
104    tensor.yield %cst : f32
105  } : tensor<?x5xf32> to tensor<7x5xf32>
106  // CHECK: %[[vA:.+]] = vector.transfer_read %[[pA]]
107  // CHECK: %[[vB:.+]] = vector.transfer_read %[[pB]]
108  // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
109  // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
110  // CHECK: vector.transfer_write %[[vR]], %[[C]]
111  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
112  %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
113  return %9 : tensor<24x25xf32>
114}
115
116module attributes {transform.with_named_sequence} {
117  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
118    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
119    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
120    %2 = transform.structured.vectorize_children_and_apply_patterns %1 : (!transform.any_op) -> !transform.any_op
121    transform.yield
122  }
123}
124
125// -----
126
127#map0 = affine_map<()[s0] -> (-s0 + 12, 7)>
128#map1 = affine_map<()[s0] -> (-s0 + 7)>
129
130// CHECK-LABEL: @vectorize_pad
131// CHECK-SAME: %[[A:.+]]: tensor<24x12xf32>
132// CHECK-SAME: %[[B:.+]]: tensor<12x25xf32>
133// CHECK-SAME: %[[C:.+]]: tensor<24x25xf32>
134func.func @vectorize_pad(
135    %arg0: tensor<24x12xf32>, %arg1: tensor<12x25xf32>,
136    %arg2: tensor<24x25xf32>, %arg3: index, %arg4: index,
137    %arg5: index) -> tensor<24x25xf32> {
138  %c0 = arith.constant 0 : index
139  %cst = arith.constant 0.000000e+00 : f32
140  %0 = affine.min #map0()[%arg5]
141  // CHECK: %[[sA:.+]] = tensor.extract_slice %[[A]]
142  // CHECK: %[[sB:.+]] = tensor.extract_slice %[[B]]
143  %1 = tensor.extract_slice %arg0[%arg3, %arg5] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
144  %2 = tensor.extract_slice %arg1[%arg5, %arg4] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
145  %3 = tensor.extract_slice %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
146  // CHECK: %[[vA:.+]] = vector.transfer_read %[[sA]]
147  %4 = affine.apply #map1()[%0]
148  %5 = tensor.pad %1 nofold low[%c0, %c0] high[%c0, %4] {
149  ^bb0(%arg6: index, %arg7: index):
150    tensor.yield %cst : f32
151  } : tensor<4x?xf32> to tensor<4x7xf32>
152  %6 = affine.apply #map1()[%0]
153  // CHECK: %[[vB:.+]] = vector.transfer_read %[[sB]]
154  %7 = tensor.pad %2 nofold low[%c0, %c0] high[%6, %c0] {
155  ^bb0(%arg6: index, %arg7: index):
156    tensor.yield %cst : f32
157  } : tensor<?x5xf32> to tensor<7x5xf32>
158  // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
159  // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
160  // CHECK: vector.transfer_write %[[vR]], %[[C]]
161  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
162  %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
163  return %9 : tensor<24x25xf32>
164}
165
166module attributes {transform.with_named_sequence} {
167  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
168    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
169    %1 = transform.get_parent_op %0 {isolated_from_above} : (!transform.any_op) -> !transform.any_op
170    %2 = transform.structured.vectorize_children_and_apply_patterns %1 {vectorize_padding} : (!transform.any_op) -> !transform.any_op
171    transform.yield
172  }
173}
174
175// -----
176
177func.func @vectorize(%arg0: tensor<24x12xf32>,
178                     %arg1: tensor<12x25xf32>,
179                     %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
180  // expected-note @below {{non-isolated target}}
181  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
182  func.return %0 : tensor<24x25xf32>
183}
184
185module attributes {transform.with_named_sequence} {
186  transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
187    %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!transform.any_op) -> !transform.any_op
188    // expected-error @below {{op requires isolated-from-above targets}}
189    %2 = transform.structured.vectorize_children_and_apply_patterns %0 : (!transform.any_op) -> !transform.any_op
190    transform.yield
191  }
192}
193