xref: /llvm-project/mlir/test/Dialect/Affine/SuperVectorize/vectorize_outer_loop_transpose_2d.mlir (revision 227ed2f448e26cfc646e30ac17b03eac9578c297)
1// RUN: mlir-opt %s -affine-super-vectorize="virtual-vector-size=32,256 test-fastest-varying=0,2" | FileCheck %s
2
3// Permutation maps used in vectorization.
4// CHECK: #[[map_proj_d0d1d2_d2d0:map[0-9]*]] = affine_map<(d0, d1, d2) -> (d2, d0)>
5
6func.func @vec2d(%A : memref<?x?x?xf32>) {
7  %c0 = arith.constant 0 : index
8  %c1 = arith.constant 1 : index
9  %c2 = arith.constant 2 : index
10  %M = memref.dim %A, %c0 : memref<?x?x?xf32>
11  %N = memref.dim %A, %c1 : memref<?x?x?xf32>
12  %P = memref.dim %A, %c2 : memref<?x?x?xf32>
13  // CHECK: for  {{.*}} = 0 to %{{.*}} {
14  // CHECK:   for  {{.*}} = 0 to %{{.*}} {
15  // CHECK:     for  {{.*}} = 0 to %{{.*}} {
16  // For the case: --test-fastest-varying=0 --test-fastest-varying=2 no
17  // vectorization happens because of loop nesting order.
18  affine.for %i0 = 0 to %M {
19    affine.for %i1 = 0 to %N {
20      affine.for %i2 = 0 to %P {
21        %a2 = affine.load %A[%i0, %i1, %i2] : memref<?x?x?xf32>
22      }
23    }
24  }
25  // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32
26  // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} step 256
27  // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} {
28  // CHECK:       {{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32>
29  affine.for %i3 = 0 to %M {
30    affine.for %i4 = 0 to %N {
31      affine.for %i5 = 0 to %P {
32        %a5 = affine.load %A[%i4, %i5, %i3] : memref<?x?x?xf32>
33      }
34    }
35  }
36  return
37}
38
39func.func @vec2d_imperfectly_nested(%A : memref<?x?x?xf32>) {
40  %c0 = arith.constant 0 : index
41  %c1 = arith.constant 1 : index
42  %c2 = arith.constant 2 : index
43  %0 = memref.dim %A, %c0 : memref<?x?x?xf32>
44  %1 = memref.dim %A, %c1 : memref<?x?x?xf32>
45  %2 = memref.dim %A, %c2 : memref<?x?x?xf32>
46  // CHECK: affine.for %{{.*}} = 0 to %{{.*}} step 32 {
47  // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} {
48  // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} step 256 {
49  // CHECK:       %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32>
50  // CHECK:   affine.for %{{.*}} = 0 to %{{.*}} step 256 {
51  // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} {
52  // CHECK:       %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32>
53  // CHECK:     affine.for %{{.*}} = 0 to %{{.*}} {
54  // CHECK:       %{{.*}} = vector.transfer_read %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}], %{{.*}} {permutation_map = #[[map_proj_d0d1d2_d2d0]]} : memref<?x?x?xf32>, vector<32x256xf32>
55  affine.for %i0 = 0 to %0 {
56    affine.for %i1 = 0 to %1 {
57      affine.for %i2 = 0 to %2 {
58        %a2 = affine.load %A[%i2, %i1, %i0] : memref<?x?x?xf32>
59      }
60    }
61    affine.for %i3 = 0 to %1 {
62      affine.for %i4 = 0 to %2 {
63        %a4 = affine.load %A[%i3, %i4, %i0] : memref<?x?x?xf32>
64      }
65      affine.for %i5 = 0 to %2 {
66        %a5 = affine.load %A[%i3, %i5, %i0] : memref<?x?x?xf32>
67      }
68    }
69  }
70  return
71}
72