xref: /llvm-project/mlir/test/Dialect/Vector/vector-transfer-unroll.mlir (revision 56d6b567394abfc07ea4d3c92fa534bbf58e1942)
1// RUN: mlir-opt %s -test-vector-transfer-unrolling-patterns --split-input-file | FileCheck %s --check-prefix=ALL
2// RUN: mlir-opt %s -test-vector-transfer-unrolling-patterns=reverse-unroll-order --split-input-file | FileCheck %s --check-prefixes=ALL,ORDER
3
4// ALL-LABEL:   func @transfer_read_unroll
5// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
6// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
7// CHECK:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
8// CHECK-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VTR0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
9// CHECK-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
10// CHECK-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VTR1]], %[[VEC0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
11// CHECK-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
12// CHECK-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VTR2]], %[[VEC1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
13// CHECK-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
14// CHECK-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VTR3]], %[[VEC2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
15// CHECK-NEXT:    return %[[VEC3]] : vector<4x4xf32>
16
17// ORDER-DAG:     %[[C2:.*]] = arith.constant 2 : index
18// ORDER-DAG:     %[[C0:.*]] = arith.constant 0 : index
19// ORDER:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
20// ORDER-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VTR0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
21// ORDER-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
22// ORDER-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VTR1]], %[[VEC0]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
23// ORDER-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
24// ORDER-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VTR2]], %[[VEC1]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
25// ORDER-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
26// ORDER-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VTR3]], %[[VEC2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
27// ORDER-NEXT:    return %[[VEC3]] : vector<4x4xf32>
28
29func.func @transfer_read_unroll(%mem : memref<4x4xf32>) -> vector<4x4xf32> {
30  %c0 = arith.constant 0 : index
31  %cf0 = arith.constant 0.0 : f32
32  %res = vector.transfer_read %mem[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
33  return %res : vector<4x4xf32>
34}
35
36// -----
37
38// ALL-LABEL:   func @transfer_write_unroll
39// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
40// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
41// CHECK:         %[[S0:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
42// CHECK-NEXT:    vector.transfer_write %[[S0]], {{.*}}[%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
43// CHECK-NEXT:    %[[S1:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
44// CHECK-NEXT:    vector.transfer_write %[[S1]], {{.*}}[%[[C0]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
45// CHECK-NEXT:    %[[S2:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
46// CHECK-NEXT:    vector.transfer_write %[[S2]], {{.*}}[%[[C2]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
47// CHECK-NEXT:    %[[S3:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
48// CHECK-NEXT:    vector.transfer_write %[[S3]], {{.*}}[%[[C2]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
49// CHECK-NEXT:    return
50
51// ORDER-DAG:     %[[C2:.*]] = arith.constant 2 : index
52// ORDER-DAG:     %[[C0:.*]] = arith.constant 0 : index
53// ORDER:         %[[S0:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
54// ORDER-NEXT:    vector.transfer_write %[[S0]], {{.*}}[%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
55// ORDER-NEXT:    %[[S1:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
56// ORDER-NEXT:    vector.transfer_write %[[S1]], {{.*}}[%[[C2]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
57// ORDER-NEXT:    %[[S2:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
58// ORDER-NEXT:    vector.transfer_write %[[S2]], {{.*}}[%[[C0]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
59// ORDER-NEXT:    %[[S3:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
60// ORDER-NEXT:    vector.transfer_write %[[S3]], {{.*}}[%[[C2]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
61// ORDER-NEXT:    return
62
63func.func @transfer_write_unroll(%mem : memref<4x4xf32>, %vec : vector<4x4xf32>) {
64  %c0 = arith.constant 0 : index
65  vector.transfer_write %vec, %mem[%c0, %c0] : vector<4x4xf32>, memref<4x4xf32>
66  return
67}
68
69// -----
70
71// CHECK-LABEL: func @transfer_readwrite_unroll
72// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
73// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
74// CHECK:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
75// CHECK-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
76// CHECK-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
77// CHECK-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C2]]], %{{.*}} : memref<4x4xf32>, vector<2x2xf32>
78// CHECK-NEXT:    vector.transfer_write %[[VTR0]], {{.*}}[%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
79// CHECK-NEXT:    vector.transfer_write %[[VTR1]], {{.*}}[%[[C0]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
80// CHECK-NEXT:    vector.transfer_write %[[VTR2]], {{.*}}[%[[C2]], %[[C0]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
81// CHECK-NEXT:    vector.transfer_write %[[VTR3]], {{.*}}[%[[C2]], %[[C2]]] {{.*}} : vector<2x2xf32>, memref<4x4xf32>
82// CHECK-NEXT:    return
83
84func.func @transfer_readwrite_unroll(%mem : memref<4x4xf32>) {
85  %c0 = arith.constant 0 : index
86  %cf0 = arith.constant 0.0 : f32
87  %0 = vector.transfer_read %mem[%c0, %c0], %cf0 : memref<4x4xf32>, vector<4x4xf32>
88  vector.transfer_write %0, %mem[%c0, %c0] : vector<4x4xf32>, memref<4x4xf32>
89  return
90}
91
92// -----
93
94// CHECK-LABEL: func @transfer_read_unroll_tensor
95// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
96// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
97// CHECK:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
98// CHECK-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VTR0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
99// CHECK-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
100// CHECK-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VTR1]], %[[VEC0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
101// CHECK-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
102// CHECK-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VTR2]], %[[VEC1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
103// CHECK-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
104// CHECK-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VTR3]], %[[VEC2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x4xf32>
105// CHECK-NEXT:    return %[[VEC3]] : vector<4x4xf32>
106
107func.func @transfer_read_unroll_tensor(%arg0 : tensor<4x4xf32>) -> vector<4x4xf32> {
108  %c0 = arith.constant 0 : index
109  %cf0 = arith.constant 0.0 : f32
110  %res = vector.transfer_read %arg0[%c0, %c0], %cf0 : tensor<4x4xf32>, vector<4x4xf32>
111  return %res : vector<4x4xf32>
112}
113
114// -----
115
116// CHECK-LABEL: func @transfer_write_unroll_tensor
117// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
118// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
119// CHECK:         %[[S0:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
120// CHECK-NEXT:    %[[VTW0:.*]] = vector.transfer_write %[[S0]], {{.*}}[%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, tensor<4x4xf32>
121// CHECK-NEXT:    %[[S1:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
122// CHECK-NEXT:    %[[VTW1:.*]] = vector.transfer_write %[[S1]], %[[VTW0]][%[[C0]], %[[C2]]] {{.*}} : vector<2x2xf32>, tensor<4x4xf32>
123// CHECK-NEXT:    %[[S2:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
124// CHECK-NEXT:    %[[VTW2:.*]] = vector.transfer_write %[[S2]], %[[VTW1]][%[[C2]], %[[C0]]] {{.*}} : vector<2x2xf32>, tensor<4x4xf32>
125// CHECK-NEXT:    %[[S3:.*]] = vector.extract_strided_slice %{{.*}} {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<4x4xf32> to vector<2x2xf32>
126// CHECK-NEXT:    %[[VTW3:.*]] = vector.transfer_write %[[S3]], %[[VTW2]][%[[C2]], %[[C2]]] {{.*}} : vector<2x2xf32>, tensor<4x4xf32>
127// CHECK-NEXT:    return %[[VTW3]] : tensor<4x4xf32>
128
129func.func @transfer_write_unroll_tensor(%arg0 : tensor<4x4xf32>,
130  %vec : vector<4x4xf32>) -> tensor<4x4xf32> {
131  %c0 = arith.constant 0 : index
132  %res = vector.transfer_write %vec, %arg0[%c0, %c0] :
133    vector<4x4xf32>, tensor<4x4xf32>
134  return %res: tensor<4x4xf32>
135}
136
137// -----
138
139// CHECK-LABEL: func @transfer_readwrite_unroll_tensor
140// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
141// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
142// CHECK:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
143// CHECK-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
144// CHECK-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
145// CHECK-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C2]]], %{{.*}} : tensor<4x4xf32>, vector<2x2xf32>
146// CHECK-NEXT:    %[[VTW0:.*]] = vector.transfer_write %[[VTR0]], {{.*}}[%[[C0]], %[[C0]]] {{.*}} : vector<2x2xf32>, tensor<4x4xf32>
147// CHECK-NEXT:    %[[VTW1:.*]] = vector.transfer_write %[[VTR1]], %[[VTW0]][%[[C0]], %[[C2]]] {{.*}} : vector<2x2xf32>, tensor<4x4xf32>
148// CHECK-NEXT:    %[[VTW2:.*]] = vector.transfer_write %[[VTR2]], %[[VTW1]][%[[C2]], %[[C0]]] {{.*}} : vector<2x2xf32>, tensor<4x4xf32>
149// CHECK-NEXT:    %[[VTW3:.*]] = vector.transfer_write %[[VTR3]], %[[VTW2]][%[[C2]], %[[C2]]] {{.*}} : vector<2x2xf32>, tensor<4x4xf32>
150// CHECK-NEXT:    return %[[VTW3]] : tensor<4x4xf32>
151
152func.func @transfer_readwrite_unroll_tensor(%arg0 : tensor<4x4xf32>, %arg1 : tensor<4x4xf32>) ->
153  tensor<4x4xf32> {
154  %c0 = arith.constant 0 : index
155  %cf0 = arith.constant 0.0 : f32
156  %0 = vector.transfer_read %arg0[%c0, %c0], %cf0 : tensor<4x4xf32>, vector<4x4xf32>
157  %res = vector.transfer_write %0, %arg1[%c0, %c0] : vector<4x4xf32>, tensor<4x4xf32>
158  return %res: tensor<4x4xf32>
159}
160
161// -----
162
163// CHECK-LABEL: func @transfer_read_unroll_permutation
164// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
165// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
166// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
167// CHECK:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
168// CHECK-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VTR0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
169// CHECK-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
170// CHECK-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VTR1]], %[[VEC0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
171// CHECK-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C4]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
172// CHECK-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VTR2]], %[[VEC1]] {offsets = [0, 4], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
173// CHECK-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
174// CHECK-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VTR3]], %[[VEC2]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
175// CHECK-NEXT:    %[[VTR4:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C2]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
176// CHECK-NEXT:    %[[VEC4:.*]] = vector.insert_strided_slice %[[VTR4]], %[[VEC3]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
177// CHECK-NEXT:    %[[VTR5:.*]] = vector.transfer_read {{.*}}[%[[C4]], %[[C2]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
178// CHECK-NEXT:    %[[VEC5:.*]] = vector.insert_strided_slice %[[VTR5]], %[[VEC4]] {offsets = [2, 4], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
179// CHECK-NEXT:    return %[[VEC5]] : vector<4x6xf32>
180#map0 = affine_map<(d0, d1) -> (d1, d0)>
181func.func @transfer_read_unroll_permutation(%mem : memref<6x4xf32>) -> vector<4x6xf32> {
182  %c0 = arith.constant 0 : index
183  %cf0 = arith.constant 0.0 : f32
184  %res = vector.transfer_read %mem[%c0, %c0], %cf0 {permutation_map = #map0} : memref<6x4xf32>, vector<4x6xf32>
185  return %res : vector<4x6xf32>
186}
187
188// -----
189
190// CHECK-LABEL: func @transfer_read_unroll_broadcast
191// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
192// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
193// CHECK:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
194// CHECK-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VTR0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
195// CHECK-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
196// CHECK-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VTR1]], %[[VEC0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
197// CHECK-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
198// CHECK-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VTR2]], %[[VEC1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
199// CHECK-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
200// CHECK-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VTR3]], %[[VEC2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
201// CHECK-NEXT:    %[[VTR4:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
202// CHECK-NEXT:    %[[VEC4:.*]] = vector.insert_strided_slice %[[VTR4]], %[[VEC3]] {offsets = [4, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
203// CHECK-NEXT:    %[[VTR5:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C2]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
204// CHECK-NEXT:    %[[VEC5:.*]] = vector.insert_strided_slice %[[VTR5]], %[[VEC4]] {offsets = [4, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
205// CHECK-NEXT:    return %[[VEC5]] : vector<6x4xf32>
206#map0 = affine_map<(d0, d1) -> (0, d1)>
207func.func @transfer_read_unroll_broadcast(%mem : memref<6x4xf32>) -> vector<6x4xf32> {
208  %c0 = arith.constant 0 : index
209  %cf0 = arith.constant 0.0 : f32
210  %res = vector.transfer_read %mem[%c0, %c0], %cf0 {permutation_map = #map0} : memref<6x4xf32>, vector<6x4xf32>
211  return %res : vector<6x4xf32>
212}
213
214// -----
215
216// CHECK-LABEL: func @transfer_read_unroll_broadcast_permuation
217// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
218// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
219// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
220// CHECK:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
221// CHECK-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VTR0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
222// CHECK-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
223// CHECK-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VTR1]], %[[VEC0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
224// CHECK-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C4]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
225// CHECK-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VTR2]], %[[VEC1]] {offsets = [0, 4], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
226// CHECK-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
227// CHECK-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VTR3]], %[[VEC2]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
228// CHECK-NEXT:    %[[VTR4:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
229// CHECK-NEXT:    %[[VEC4:.*]] = vector.insert_strided_slice %[[VTR4]], %[[VEC3]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
230// CHECK-NEXT:    %[[VTR5:.*]] = vector.transfer_read {{.*}}[%[[C4]], %[[C0]]], %{{.*}} : memref<6x4xf32>, vector<2x2xf32>
231// CHECK-NEXT:    %[[VEC5:.*]] = vector.insert_strided_slice %[[VTR5]], %[[VEC4]] {offsets = [2, 4], strides = [1, 1]} : vector<2x2xf32> into vector<4x6xf32>
232// CHECK-NEXT:    return %[[VEC5]] : vector<4x6xf32>
233#map0 = affine_map<(d0, d1) -> (0, d0)>
234func.func @transfer_read_unroll_broadcast_permuation(%mem : memref<6x4xf32>) -> vector<4x6xf32> {
235  %c0 = arith.constant 0 : index
236  %cf0 = arith.constant 0.0 : f32
237  %res = vector.transfer_read %mem[%c0, %c0], %cf0 {permutation_map = #map0} : memref<6x4xf32>, vector<4x6xf32>
238  return %res : vector<4x6xf32>
239}
240
241// -----
242
243// ALL-LABEL:   func @transfer_read_unroll_different_rank
244// CHECK-DAG:     %[[C4:.*]] = arith.constant 4 : index
245// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
246// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
247// CHECK:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
248// CHECK-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VTR0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
249// CHECK-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]], %[[C0]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
250// CHECK-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VTR1]], %[[VEC0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
251// CHECK-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]], %[[C2]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
252// CHECK-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VTR2]], %[[VEC1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
253// CHECK-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]], %[[C2]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
254// CHECK-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VTR3]], %[[VEC2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
255// CHECK-NEXT:    %[[VTR4:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]], %[[C4]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
256// CHECK-NEXT:    %[[VEC4:.*]] = vector.insert_strided_slice %[[VTR4]], %[[VEC3]] {offsets = [4, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
257// CHECK-NEXT:    %[[VTR5:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]], %[[C4]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
258// CHECK-NEXT:    %[[VEC5:.*]] = vector.insert_strided_slice %[[VTR5]], %[[VEC4]] {offsets = [4, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
259// CHECK-NEXT:    return %[[VEC5]] : vector<6x4xf32>
260
261// ORDER-DAG:     %[[C4:.*]] = arith.constant 4 : index
262// ORDER-DAG:     %[[C2:.*]] = arith.constant 2 : index
263// ORDER-DAG:     %[[C0:.*]] = arith.constant 0 : index
264// ORDER:         %[[VTR0:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]], %[[C0]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
265// ORDER-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VTR0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
266// ORDER-NEXT:    %[[VTR1:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]], %[[C2]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
267// ORDER-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VTR1]], %[[VEC0]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
268// ORDER-NEXT:    %[[VTR2:.*]] = vector.transfer_read {{.*}}[%[[C0]], %[[C0]], %[[C4]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
269// ORDER-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VTR2]], %[[VEC1]] {offsets = [4, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
270// ORDER-NEXT:    %[[VTR3:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]], %[[C0]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
271// ORDER-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VTR3]], %[[VEC2]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
272// ORDER-NEXT:    %[[VTR4:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]], %[[C2]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
273// ORDER-NEXT:    %[[VEC4:.*]] = vector.insert_strided_slice %[[VTR4]], %[[VEC3]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
274// ORDER-NEXT:    %[[VTR5:.*]] = vector.transfer_read {{.*}}[%[[C2]], %[[C0]], %[[C4]]], %{{.*}} : memref<?x?x?xf32>, vector<2x2xf32>
275// ORDER-NEXT:    %[[VEC5:.*]] = vector.insert_strided_slice %[[VTR5]], %[[VEC4]] {offsets = [4, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
276// ORDER-NEXT:    return %[[VEC5]] : vector<6x4xf32>
277
278#map0 = affine_map<(d0, d1, d2) -> (d2, d0)>
279func.func @transfer_read_unroll_different_rank(%mem : memref<?x?x?xf32>) -> vector<6x4xf32> {
280  %c0 = arith.constant 0 : index
281  %cf0 = arith.constant 0.0 : f32
282  %res = vector.transfer_read %mem[%c0, %c0, %c0], %cf0 {permutation_map = #map0} : memref<?x?x?xf32>, vector<6x4xf32>
283  return %res : vector<6x4xf32>
284}
285
286// -----
287
288// ALL-LABEL:   func @vector_gather_unroll
289// ALL-SAME:      %[[MEM:.*]]: memref<?x?x?xf32>
290// ALL-SAME:      %[[INDICES:.*]]: vector<6x4xindex>
291// ALL-SAME:      %[[MASK:.*]]: vector<6x4xi1>
292// ALL-SAME:      %[[PT:.*]]: vector<6x4xf32>
293
294// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
295// CHECK:         %[[IDX0:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
296// CHECK-NEXT:    %[[MASK0:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
297// CHECK-NEXT:    %[[PASS0:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
298// CHECK-NEXT:    %[[VGT0:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX0]]], %[[MASK0]], %[[PASS0]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
299// CHECK-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VGT0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
300// CHECK-NEXT:    %[[IDX1:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
301// CHECK-NEXT:    %[[MASK1:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
302// CHECK-NEXT:    %[[PASS1:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
303// CHECK-NEXT:    %[[VGT1:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX1]]], %[[MASK1]], %[[PASS1]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
304// CHECK-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VGT1]], %[[VEC0]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
305// CHECK-NEXT:    %[[IDX2:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
306// CHECK-NEXT:    %[[MASK2:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
307// CHECK-NEXT:    %[[PASS2:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
308// CHECK-NEXT:    %[[VGT2:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX2]]], %[[MASK2]], %[[PASS2]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
309// CHECK-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VGT2]], %[[VEC1]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
310// CHECK-NEXT:    %[[IDX3:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
311// CHECK-NEXT:    %[[MASK3:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
312// CHECK-NEXT:    %[[PASS3:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
313// CHECK-NEXT:    %[[VGT3:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX3]]], %[[MASK3]], %[[PASS3]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
314// CHECK-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VGT3]], %[[VEC2]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
315// CHECK-NEXT:    %[[IDX4:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [4, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
316// CHECK-NEXT:    %[[MASK4:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [4, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
317// CHECK-NEXT:    %[[PASS4:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [4, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
318// CHECK-NEXT:    %[[VGT4:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX4]]], %[[MASK4]], %[[PASS4]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
319// CHECK-NEXT:    %[[VEC4:.*]] = vector.insert_strided_slice %[[VGT4]], %[[VEC3]] {offsets = [4, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
320// CHECK-NEXT:    %[[IDX5:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [4, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
321// CHECK-NEXT:    %[[MASK5:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [4, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
322// CHECK-NEXT:    %[[PASS5:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [4, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
323// CHECK-NEXT:    %[[VGT5:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX5]]], %[[MASK5]], %[[PASS5]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
324// CHECK-NEXT:    %[[VEC5:.*]] = vector.insert_strided_slice %[[VGT5]], %[[VEC4]] {offsets = [4, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
325// CHECK-NEXT:    return %[[VEC5]] : vector<6x4xf32>
326
327// ORDER-DAG:     %[[C0:.*]] = arith.constant 0 : index
328// ORDER:         %[[IDX0:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
329// ORDER-NEXT:    %[[MASK0:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
330// ORDER-NEXT:    %[[PASS0:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [0, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
331// ORDER-NEXT:    %[[VGT0:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX0]]], %[[MASK0]], %[[PASS0]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
332// ORDER-NEXT:    %[[VEC0:.*]] = vector.insert_strided_slice %[[VGT0]], %{{.*}} {offsets = [0, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
333// ORDER-NEXT:    %[[IDX1:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
334// ORDER-NEXT:    %[[MASK1:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
335// ORDER-NEXT:    %[[PASS1:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [2, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
336// ORDER-NEXT:    %[[VGT1:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX1]]], %[[MASK1]], %[[PASS1]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
337// ORDER-NEXT:    %[[VEC1:.*]] = vector.insert_strided_slice %[[VGT1]], %[[VEC0]] {offsets = [2, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
338// ORDER-NEXT:    %[[IDX2:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [4, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
339// ORDER-NEXT:    %[[MASK2:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [4, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
340// ORDER-NEXT:    %[[PASS2:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [4, 0], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
341// ORDER-NEXT:    %[[VGT2:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX2]]], %[[MASK2]], %[[PASS2]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
342// ORDER-NEXT:    %[[VEC2:.*]] = vector.insert_strided_slice %[[VGT2]], %[[VEC1]] {offsets = [4, 0], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
343// ORDER-NEXT:    %[[IDX3:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
344// ORDER-NEXT:    %[[MASK3:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
345// ORDER-NEXT:    %[[PASS3:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [0, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
346// ORDER-NEXT:    %[[VGT3:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX3]]], %[[MASK3]], %[[PASS3]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
347// ORDER-NEXT:    %[[VEC3:.*]] = vector.insert_strided_slice %[[VGT3]], %[[VEC2]] {offsets = [0, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
348// ORDER-NEXT:    %[[IDX4:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
349// ORDER-NEXT:    %[[MASK4:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
350// ORDER-NEXT:    %[[PASS4:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [2, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
351// ORDER-NEXT:    %[[VGT4:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX4]]], %[[MASK4]], %[[PASS4]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
352// ORDER-NEXT:    %[[VEC4:.*]] = vector.insert_strided_slice %[[VGT4]], %[[VEC3]] {offsets = [2, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
353// ORDER-NEXT:    %[[IDX5:.*]] = vector.extract_strided_slice %[[INDICES]] {offsets = [4, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xindex> to vector<2x2xindex>
354// ORDER-NEXT:    %[[MASK5:.*]] = vector.extract_strided_slice %[[MASK]] {offsets = [4, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xi1> to vector<2x2xi1>
355// ORDER-NEXT:    %[[PASS5:.*]] = vector.extract_strided_slice %[[PT]] {offsets = [4, 2], sizes = [2, 2], strides = [1, 1]} : vector<6x4xf32> to vector<2x2xf32>
356// ORDER-NEXT:    %[[VGT5:.*]] = vector.gather {{.*}}[%[[C0]], %[[C0]], %[[C0]]] [%[[IDX5]]], %[[MASK5]], %[[PASS5]] : memref<?x?x?xf32>, vector<2x2xindex>, vector<2x2xi1>, vector<2x2xf32> into vector<2x2xf32>
357// ORDER-NEXT:    %[[VEC5:.*]] = vector.insert_strided_slice %[[VGT5]], %[[VEC4]] {offsets = [4, 2], strides = [1, 1]} : vector<2x2xf32> into vector<6x4xf32>
358// ORDER-NEXT:    return %[[VEC5]] : vector<6x4xf32>
359
360func.func @vector_gather_unroll(%mem : memref<?x?x?xf32>,
361                                %indices : vector<6x4xindex>,
362                                %mask : vector<6x4xi1>,
363                                %pass_thru : vector<6x4xf32>) -> vector<6x4xf32> {
364  %c0 = arith.constant 0 : index
365  %res = vector.gather %mem[%c0, %c0, %c0] [%indices], %mask, %pass_thru : memref<?x?x?xf32>, vector<6x4xindex>, vector<6x4xi1>, vector<6x4xf32> into vector<6x4xf32>
366  return %res : vector<6x4xf32>
367}
368