Lines Matching full:affine

1 // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=PRODUCER-CONSUMER
2 // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{fusion-maximal mode=sibling}))' -split-input-file | FileCheck %s --check-prefix=SIBLING-MAXIMAL
3 // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(spirv.func(affine-loop-fusion{mode=producer}))' -split-input-file | FileCheck %s --check-prefix=SPIRV
16 affine.for %i0 = 0 to 7 {
17 affine.for %i1 = 0 to 8 {
18 affine.for %i2 = 0 to 9 {
19 affine.for %i3 = 0 to 10 {
20 affine.store %cf7, %m[720 * %i0 + 90 * %i1 + 10 * %i2 + %i3] : memref<5040xf32>
25 affine.for %i0 = 0 to 7 {
26 affine.for %i1 = 0 to 8 {
27 affine.for %i2 = 0 to 9 {
28 affine.for %i3 = 0 to 10 {
29 %v0 = affine.load %m[720 * %i0 + 90 * %i1 + 10 * %i2 + %i3] : memref<5040xf32>
30 affine.store %v0, %arg1[%i0, %i1, %i2, %i3] : memref<7x8x9x10xf32>
38 // PRODUCER-CONSUMER: affine.for
39 // PRODUCER-CONSUMER-NEXT: affine.for
40 // PRODUCER-CONSUMER-NEXT: affine.for
41 // PRODUCER-CONSUMER-NEXT: affine.for
42 // PRODUCER-CONSUMER-NOT: affine.for
54 affine.for %i0 = 0 to 7 {
55 affine.for %i1 = 0 to 8 {
56 affine.store %cf7, %m[8 * %i0 + %i1] : memref<56xf32>
59 affine.for %i0 = 0 to 8 {
60 affine.for %i1 = 0 to 7 {
61 %v0 = affine.load %m[%i0 + 8 * %i1] : memref<56xf32>
62 affine.store %v0, %arg1[%i0, %i1] : memref<8x7xf32>
68 // PRODUCER-CONSUMER: affine.for
69 // PRODUCER-CONSUMER-NEXT: affine.for
70 // PRODUCER-CONSUMER-NOT: affine.for
81 affine.for %i0 = 0 to 100 {
82 %r1 = affine.load %src[%i0]: memref<100xf32>
83 affine.store %r1, %m[%i0] : memref<100xf32>
85 affine.for %i2 = 0 to 100 step 2 {
86 %r2 = affine.load %m[%i2] : memref<100xf32>
87 affine.store %r2, %out[%i2] : memref<100xf32>
96 // PRODUCER-CONSUMER: affine.for %[[idx_0:.*]] = 0 to 100 {
97 // PRODUCER-CONSUMER-NEXT: %[[result_0:.*]] = affine.load %[[arr1:.*]][%[[idx_0]]] : memref<100xf32>
98 // PRODUCER-CONSUMER-NEXT: affine.store %[[result_0]], %{{.*}}[%[[idx_0]]] : memref<100xf32>
100 // PRODUCER-CONSUMER: affine.for %[[idx_1:.*]] = 0 to 100 step 2 {
101 // PRODUCER-CONSUMER: affine.load %[[arr1]][%[[idx_1]]] : memref<100xf32>
111 affine.for %arg3 = 0 to 1 {
112 affine.for %arg4 = 0 to 64 {
113 %accum = affine.for %arg5 = 0 to 64 iter_args (%prevAccum = %cst_0) -> f32 {
114 %4 = affine.load %arg0[%arg5, %arg4] : memref<64x64xf32, 1>
116 affine.yield %5 : f32
119 affine.store %accum_dbl, %arg1[%arg3, %arg4] : memref<1x64xf32, 1>
122 affine.for %arg3 = 0 to 1 {
123 affine.for %arg4 = 0 to 64 {
125 %accum = affine.for %arg5 = 0 to 32 iter_args (%prevAccum = %cst_1) -> f32 {
126 %4 = affine.load %arg0[%arg5, %arg4] : memref<64x64xf32, 1>
128 affine.yield %5 : f32
131 affine.store %accum_sqr, %arg2[%arg3, %arg4] : memref<1x64xf32, 1>
141 // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_0:.*]]= 0 to 1 {
142 // SIBLING-MAXIMAL-NEXT: affine.for %[[idx_1:.*]] = 0 to 64 {
143 // SIBLING-MAXIMAL-NEXT: %[[result_1:.*]] = affine.for %[[idx_2:.*]] = 0 to 32 iter_args(%[[iter_0:.*]] = %[[cst_1]]) -> (f32) {
144 // SIBLING-MAXIMAL-NEXT: %[[result_0:.*]] = affine.for %[[idx_3:.*]] = 0 to 64 iter_args(%[[iter_1:.*]] = %[[cst_0]]) -> (f32) {
150 affine.for %arg1 = 0 to 10 {
151 %0 = affine.load %arg0[%arg1] : memref<10xf32>
153 affine.for %arg1 = 0 to 10 {
154 %0 = affine.load %arg0[%arg1] : memref<10xf32>
156 // SIBLING-MAXIMAL-NEXT: affine.for
157 // SIBLING-MAXIMAL-NEXT: affine.load
158 // SIBLING-MAXIMAL-NEXT: affine.load
170 affine.for %i0 = 0 to 10 {
171 affine.store %cf7, %m[%i0] : memref<10xf32>
173 affine.for %i1 = 0 to 10 {
174 %v0 = affine.load %m[%i1] : memref<10xf32>
176 // PRODUCER-CONSUMER: affine.for %{{.*}} = 0 to 10 {
177 // PRODUCER-CONSUMER-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
178 // PRODUCER-CONSUMER-NEXT: affine.load %{{.*}}[0] : memref<1xf32>
182 affine.for %i0 = 0 to 10 {
183 affine.store %cf7, %m[%i0] : memref<10xf32>
185 affine.for %i1 = 0 to 10 {
186 %v0 = affine.load %m[%i1] : memref<10xf32>
188 // PRODUCER-CONSUMER: affine.for %{{.*}} = 0 to 10 {
189 // PRODUCER-CONSUMER-NEXT: affine.store %{{.*}}, %{{.*}}[0] : memref<1xf32>
190 // PRODUCER-CONSUMER-NEXT: affine.load %{{.*}}[0] : memref<1xf32>
201 affine.for %arg205 = 0 to 8 {
202 affine.for %arg206 = 0 to 128 {
203 affine.for %arg207 = 0 to 12 {
204 affine.for %arg208 = 0 to 64 {
205 %a = affine.load %A[%arg205, %arg207, %arg206, %arg208] : memref<8x12x128x64xf32>
206 affine.store %a, %B[%arg205, %arg206, %arg207, %arg208] : memref<8x128x12x64xf32>
212 affine.for %arg205 = 0 to 8 {
213 affine.for %arg206 = 0 to 128 {
214 affine.for %arg207 = 0 to 768 {
215 %b = affine.load %B[%arg205, %arg206, %arg207 floordiv 64, %arg207 mod 64] : memref<8x128x12x64xf32>
217 affine.store %c, %C[%arg205, %arg206, %arg207] : memref<8x128x768xf16>
223 // PRODUCER-CONSUMER: affine.for
224 // PRODUCER-CONSUMER-NEXT: affine.for %{{.*}} = 0 to 128
225 // PRODUCER-CONSUMER-NEXT: affine.for %{{.*}} = 0 to 768
226 // PRODUCER-CONSUMER-NOT: affine.for
247 affine.for %arg1 = 0 to 1 {
248 affine.for %arg2 = 0 to 32 {
249 affine.for %arg3 = 0 to 32 {
250 affine.for %arg4 = 0 to 8 {
251 affine.for %arg5 = 0 to 1 {
252 affine.for %arg6 = 0 to 1 {
253 %4 = affine.apply #map(%arg2, %arg5)
254 %5 = affine.apply #map(%arg3, %arg6)
255 %6 = affine.load %1[%arg1, %4, %5, %arg4] : memref<1x40x8229x8xf32>
256 %7 = affine.load %alloc_0[%arg1, %arg2, %arg3, %arg4] : memref<1x32x32x8xf32>
258 affine.store %8, %alloc_0[%arg1, %arg2, %arg3, %arg4] : memref<1x32x32x8xf32>
266 affine.for %arg1 = 0 to 1 {
267 affine.for %arg2 = 0 to 32 {
268 affine.for %arg3 = 0 to 32 {
269 affine.for %arg4 = 0 to 8 {
270 %4 = affine.load %alloc_0[%arg1, %arg2, %arg3, %arg4] : memref<1x32x32x8xf32>
276 // SPIRV: affine.for %{{.*}} = 0 to 1 {
277 // SPIRV-NEXT: affine.for %{{.*}} = 0 to 32 {
278 // SPIRV-NEXT: affine.for %{{.*}} = 0 to 32 {
279 // SPIRV-NEXT: affine.for %{{.*}} = 0 to 8 {
280 // SPIRV-NOT: affine.for %{{.*}}