1// RUN: mlir-opt -fold-tensor-subset-ops -split-input-file --allow-unregistered-dialect %s | FileCheck %s 2 3func.func @fold_vector_transfer_read_with_rank_reduced_extract_slice( 4 %arg0 : tensor<?x?x?xf32>, 5 %arg1: index, %arg2 : index, %arg3 : index, %arg4: index, %arg5 : index, 6 %arg6 : index) -> vector<4xf32> { 7 %cst = arith.constant 0.0 : f32 8 %0 = tensor.extract_slice %arg0[0, %arg1, %arg2] [1, %arg3, %arg4] [1, 1, 1] 9 : tensor<?x?x?xf32> to 10 tensor<?x?xf32> 11 %1 = vector.transfer_read %0[%arg5, %arg6], %cst {in_bounds = [true]} 12 : tensor<?x?xf32>, vector<4xf32> 13 return %1 : vector<4xf32> 14} 15// CHECK-DAG: #[[$MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> 16// CHECK: func @fold_vector_transfer_read_with_rank_reduced_extract_slice 17// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?xf32> 18// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index 19// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index 20// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index 21// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index 22// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index 23// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index 24// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 25// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[$MAP1]]()[%[[ARG1]], %[[ARG5]]] 26// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[$MAP1]]()[%[[ARG2]], %[[ARG6]]] 27// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]], %{{.*}} : tensor<?x?x?xf32 28 29// ----- 30 31// CHECK-LABEL: func.func @transfer_read_from_rank_reducing_extract_slice_failure 32func.func @transfer_read_from_rank_reducing_extract_slice_failure( 33 %src: tensor<1x8x8x8xf32>, 34 %i1: index, %i2: index, %i3: index, %i4: index) -> vector<4xf32> { 35 %c0 = arith.constant 0 : index 36 %c1 = arith.constant 1 : index 37 %c2 = arith.constant 2 : index 38 %f0 = arith.constant 0.000000e+00 : f32 39 40 // Can't fold this atm since we don' emit the proper vector.extract_strided_slice. 41// CHECK: tensor.extract_slice 42 %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [2, 3, 4, 5] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32> 43 %1 = vector.transfer_read %0[%c1, %i4, %c2], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32> 44 return %1 : vector<4xf32> 45} 46 47// ----- 48 49// CHECK-DAG: #[[$ADD_4:.+]] = affine_map<()[s0] -> (s0 + 4)> 50 51// CHECK-LABEL: func @transfer_read_of_extract_slice( 52// CHECK-SAME: %[[t:.*]]: tensor<?x?xf32>, %[[s1:.*]]: index, %[[s2:.*]]: index 53// CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index 54// CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s1]]] 55// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true, true]} : tensor<?x?xf32>, vector<5x6xf32> 56// CHECK: return %[[r]] 57func.func @transfer_read_of_extract_slice(%t : tensor<?x?xf32>, %s1 : index, %s2 : index) -> vector<5x6xf32> { 58 %c3 = arith.constant 3 : index 59 %c4 = arith.constant 4 : index 60 %cst = arith.constant 0.0 : f32 61 %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor<?x?xf32> to tensor<10x?xf32> 62 %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor<10x?xf32>, vector<5x6xf32> 63 return %1 : vector<5x6xf32> 64} 65// ----- 66 67func.func @fold_extract_slice_with_transfer_read_0d( 68 %arg0 : tensor<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index) 69 -> vector<f32> { 70 %f1 = arith.constant 1.0 : f32 71 %0 = tensor.extract_slice %arg0[%arg1, %arg2][1, 1][1, 1] : tensor<12x32xf32> to tensor<f32> 72 %1 = vector.transfer_read %0[], %f1 : tensor<f32>, vector<f32> 73 return %1 : vector<f32> 74} 75// CHECK: func @fold_extract_slice_with_transfer_read_0d 76// CHECK-SAME: %[[T:[a-zA-Z0-9_]+]]: tensor<12x32xf32> 77// CHECK-SAME: %[[SZ0:[a-zA-Z0-9_]+]]: index 78// CHECK-SAME: %[[SZ1:[a-zA-Z0-9_]+]]: index 79// CHECK-SAME: %[[ST1:[a-zA-Z0-9_]+]]: index 80// CHECK: vector.transfer_read %[[T]][%[[SZ0]], %[[SZ1]]] 81 82// ----- 83 84// CHECK-DAG: #[[$ADD_4:.+]] = affine_map<()[s0] -> (s0 + 4)> 85 86// CHECK-LABEL: func @transfer_read_of_extract_slice( 87// CHECK-SAME: %[[t:.*]]: tensor<?x?xf32>, %[[s1:.*]]: index, %[[s2:.*]]: index 88// CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index 89// CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s1]]] 90// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true]} : tensor<?x?xf32>, vector<6xf32> 91// CHECK: return %[[r]] 92func.func @transfer_read_of_extract_slice(%t : tensor<?x?xf32>, %s1 : index, %s2 : index) -> vector<6xf32> { 93 %c3 = arith.constant 3 : index 94 %c4 = arith.constant 4 : index 95 %cst = arith.constant 0.0 : f32 96 %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor<?x?xf32> to tensor<10x?xf32> 97 %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true]} : tensor<10x?xf32>, vector<6xf32> 98 return %1 : vector<6xf32> 99} 100 101// ----- 102 103// CHECK-DAG: #[[$ADD_3:.+]] = affine_map<()[s0] -> (s0 + 3)> 104 105// CHECK-LABEL: func @transfer_read_of_extract_slice_rank_reducing( 106// CHECK-SAME: %[[t:.*]]: tensor<?x?x?xf32>, %[[s1:.*]]: index, %[[s2:.*]]: index 107// CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index 108// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index 109// CHECK: %[[add:.*]] = affine.apply #[[$ADD_3]]()[%[[s1]]] 110// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c5]], %[[add]], %[[c10]]], %{{.*}} {in_bounds = [true, true]} : tensor<?x?x?xf32>, vector<5x6xf32> 111// CHECK: return %[[r]] 112func.func @transfer_read_of_extract_slice_rank_reducing(%t : tensor<?x?x?xf32>, %s1 : index, %s2 : index) -> vector<5x6xf32> { 113 %c3 = arith.constant 3 : index 114 %c4 = arith.constant 4 : index 115 %cst = arith.constant 0.0 : f32 116 %0 = tensor.extract_slice %t[5, %s1, 6] [1, %s2, 12] [1, 1, 1] : tensor<?x?x?xf32> to tensor<?x12xf32> 117 %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor<?x12xf32>, vector<5x6xf32> 118 return %1 : vector<5x6xf32> 119} 120 121// ----- 122 123// CHECK-DAG: #[[$ADD_4:.+]] = affine_map<()[s0] -> (s0 + 4)> 124// CHECK-DAG: #[[$d0d2:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> 125 126// CHECK-LABEL: func @transfer_read_of_extract_slice_swappy_rank_reducing( 127// CHECK-SAME: %[[t:.*]]: tensor<?x?x?xf32>, %[[s1:.*]]: index, %[[s2:.*]]: index 128func.func @transfer_read_of_extract_slice_swappy_rank_reducing(%t : tensor<?x?x?xf32>, %s1 : index, %s2 : index) -> vector<5x6xf32> { 129 %c3 = arith.constant 3 : index 130 %c4 = arith.constant 4 : index 131 %cst = arith.constant 0.0 : f32 132 133// CHECK-NOT: extract_slice 134// CHECK: %[[c8:.*]] = arith.constant 8 : index 135// CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s2]]] 136// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[s1]], %[[add]]] 137// CHECK-SAME: permutation_map = #[[$d0d2]] 138// CHECK-SAME: tensor<?x?x?xf32>, vector<5x6xf32> 139 %0 = tensor.extract_slice %t[5, %s1, %s2] [%s2, 1, 12] [1, 1, 1] : tensor<?x?x?xf32> to tensor<?x12xf32> 140 %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor<?x12xf32>, vector<5x6xf32> 141 142 return %1 : vector<5x6xf32> 143} 144 145// ----- 146 147// CHECK: func @fold_vector_transfer_write_with_rank_reduced_insert_slice 148// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?xf32> 149// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32> 150// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index 151// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index 152// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index 153// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index 154// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index 155// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index 156// CHECK-SAME: %[[ARG8:[a-zA-Z0-9]+]]: tensor<?x?xf32> 157func.func @fold_vector_transfer_write_with_rank_reduced_insert_slice( 158 %arg0 : tensor<?x?x?xf32>, 159 %arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index, 160 %arg5: index, %arg6 : index, %arg7 : index, 161 %st : tensor<?x?xf32>) -> tensor<?x?x?xf32> { 162 %cst = arith.constant 0.0 : f32 163 164 // CHECK-DAG: %[[r1:.*]] = vector.transfer_write %[[ARG1]], %[[ARG8]][%[[ARG6]], %[[ARG7]]] {in_bounds = [true]} : vector<4xf32>, tensor<?x?xf32> 165 // CHECK-DAG: %[[r2:.*]] = tensor.insert_slice %[[r1]] into %[[ARG0]][0, %[[ARG2]], %[[ARG3]]] [1, %[[ARG4]], %[[ARG5]]] [1, 1, 1] : tensor<?x?xf32> into tensor<?x?x?xf32> 166 %0 = vector.transfer_write %arg1, %st[%arg6, %arg7] {in_bounds = [true]} 167 : vector<4xf32>, tensor<?x?xf32> 168 %1 = tensor.insert_slice %0 into %arg0[0, %arg2, %arg3] [1, %arg4, %arg5] [1, 1, 1] 169 : tensor<?x?xf32> into tensor<?x?x?xf32> 170 return %1 : tensor<?x?x?xf32> 171} 172 173// ----- 174 175// CHECK: func @fold_vector_transfer_write_with_inner_rank_reduced_insert_slice 176// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?x?xf32> 177// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32> 178// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index 179// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index 180// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index 181// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index 182// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index 183// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index 184// CHECK-SAME: %[[ARG8:[a-zA-Z0-9]+]]: tensor<?x?xf32> 185func.func @fold_vector_transfer_write_with_inner_rank_reduced_insert_slice( 186 %arg0 : tensor<?x?x?xf32>, 187 %arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index, 188 %arg5: index, %arg6 : index, %arg7 : index, 189 %st : tensor<?x?xf32>) -> tensor<?x?x?xf32> { 190 %cst = arith.constant 0.0 : f32 191 192 // CHECK-DAG: %[[r1:.*]] = vector.transfer_write %[[ARG1]], %[[ARG8]][%[[ARG6]], %[[ARG7]]] {in_bounds = [true]} : vector<4xf32>, tensor<?x?xf32> 193 // CHECK-DAG: %[[r2:.*]] = tensor.insert_slice %[[r1]] into %[[ARG0]][%[[ARG2]], %[[ARG3]], 0] [%[[ARG4]], %[[ARG5]], 1] [1, 1, 1] : tensor<?x?xf32> into tensor<?x?x?xf32> 194 %0 = vector.transfer_write %arg1, %st[%arg6, %arg7] {in_bounds = [true]} 195 : vector<4xf32>, tensor<?x?xf32> 196 %1 = tensor.insert_slice %0 into %arg0[%arg2, %arg3, 0] [%arg4, %arg5, 1] [1, 1, 1] 197 : tensor<?x?xf32> into tensor<?x?x?xf32> 198 return %1 : tensor<?x?x?xf32> 199} 200 201// ----- 202 203// CHECK-LABEL: func @insert_slice_of_transfer_write( 204// CHECK-SAME: %[[t1:.*]]: tensor<?x12xf32>, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index 205func.func @insert_slice_of_transfer_write(%t1 : tensor<?x12xf32>, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor<?x12xf32> { 206 %c0 = arith.constant 0 : index 207 208 // CHECK-NOT: insert_slice 209// CHECK: %[[c3:.*]] = arith.constant 3 : index 210// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<?x12xf32> 211// CHECK: return %[[r]] 212 %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> 213 %1 = tensor.insert_slice %0 into %t1[3, %s] [5, 6] [1, 1] : tensor<5x6xf32> into tensor<?x12xf32> 214 return %1 : tensor<?x12xf32> 215} 216 217// ----- 218 219// This test is negative since `transfer_write` only 220// writes to `5x6` of the `100x100` elements of `%arg3` 221// CHECK-LABEL: func @insert_slice_of_transfer_write_overwrite_all( 222// CHECK-SAME: %[[arg0:.*]]: tensor<1000x1000xf32>, %[[arg1:.*]]: vector<5x6xf32>, %[[arg2:.*]]: index, %[[arg3:.*]]: tensor<100x100xf32> 223func.func @insert_slice_of_transfer_write_overwrite_all(%arg0: tensor<1000x1000xf32>, %arg1: vector<5x6xf32>, %arg2: index, %arg3: tensor<100x100xf32>) -> tensor<1000x1000xf32> { 224 %c0 = arith.constant 0 : index 225 226// CHECK: %[[c0:.*]] = arith.constant 0 : index 227// CHECK: %[[r1:.*]] = vector.transfer_write %[[arg1]], %[[arg3]][%[[c0]], %[[c0]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<100x100xf32> 228// CHECK: %[[r2:.*]] = tensor.insert_slice %[[r1]] into %[[arg0]][3, %[[arg2]]] [100, 100] [1, 1] : tensor<100x100xf32> into tensor<1000x1000xf32> 229// CHECK: return %[[r2]] : tensor<1000x1000xf32> 230 %0 = vector.transfer_write %arg1, %arg3[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<100x100xf32> 231 %inserted_slice = tensor.insert_slice %0 into %arg0[3, %arg2] [100, 100] [1, 1] : tensor<100x100xf32> into tensor<1000x1000xf32> 232 return %inserted_slice : tensor<1000x1000xf32> 233} 234 235// ----- 236 237// CHECK-DAG: #[[$d0d2:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> 238 239// CHECK-LABEL: func @insert_slice_of_transfer_write_swappy_rank_extending( 240// CHECK-SAME: %[[t1:.*]]: tensor<?x?x12xf32>, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index 241func.func @insert_slice_of_transfer_write_swappy_rank_extending( 242 %t1 : tensor<?x?x12xf32>, %v : vector<5x6xf32>, 243 %s : index, %t2 : tensor<5x6xf32>) -> tensor<?x?x12xf32> { 244 %c0 = arith.constant 0 : index 245 246// CHECK-NOT: insert_slice 247// CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index 248// CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index 249// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] 250// CHECK-SAME: {in_bounds = [true, true], permutation_map = #[[$d0d2]]} : vector<5x6xf32>, tensor<?x?x12xf32> 251// CHECK: return %[[r]] 252 %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> 253 %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [5, 1, 6] [1, 1, 1] : tensor<5x6xf32> into tensor<?x?x12xf32> 254 return %1 : tensor<?x?x12xf32> 255} 256 257// ----- 258 259// CHECK-LABEL: func @insert_slice_of_transfer_write_rank_extending( 260// CHECK-SAME: %[[t1:.*]]: tensor<?x?x12xf32>, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index 261// CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index 262// CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index 263// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<?x?x12xf32> 264// CHECK: return %[[r]] 265func.func @insert_slice_of_transfer_write_rank_extending(%t1 : tensor<?x?x12xf32>, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor<?x?x12xf32> { 266 %c0 = arith.constant 0 : index 267 %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> 268 %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [1, 5, 6] [1, 1, 1] : tensor<5x6xf32> into tensor<?x?x12xf32> 269 return %1 : tensor<?x?x12xf32> 270} 271 272// ----- 273 274// CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 + 2)> 275// CHECK-LABEL: func @insert_slice_of_insert_slice( 276// CHECK-SAME: %[[t:[0-9a-z]*]]: tensor<f32> 277// CHECK-SAME: %[[r1:[0-9a-z]*]]: tensor<1x14xf32> 278// CHECK-SAME: %[[pos:[0-9a-z]*]]: index 279// CHECK: %[[add:.*]] = affine.apply #[[$map]]()[%[[pos]]] 280// CHECK: tensor.insert_slice %[[t]] into %[[r1]][4, %[[add]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32> 281func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1x1xf32>, %r1: tensor<1x14xf32>, %pos: index) 282 -> tensor<1x14xf32> 283{ 284 %0 = tensor.insert_slice %t into %r0[1, 2] [1, 1] [1, 1] 285 : tensor<f32> into tensor<1x1xf32> 286 %1 = tensor.insert_slice %0 into %r1[3, %pos] [1, 1] [1, 1] 287 : tensor<1x1xf32> into tensor<1x14xf32> 288 return %1 : tensor<1x14xf32> 289} 290 291// ----- 292 293// CHECK-DAG: #[[$map:.*]] = affine_map<()[s0] -> (s0 + 2)> 294// CHECK-LABEL: func @insert_slice_of_insert_slice( 295// CHECK-SAME: %[[t:[0-9a-z]*]]: tensor<f32> 296// CHECK-SAME: %[[r1:[0-9a-z]*]]: tensor<1x14xf32> 297// CHECK-SAME: %[[pos:[0-9a-z]*]]: index 298// CHECK: %[[composed_pos:.+]] = affine.apply #[[$map]]()[%[[pos]]] 299// CHECK: tensor.insert_slice %[[t]] into %[[r1]][3, %[[composed_pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32> 300func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1xf32>, %r1: tensor<1x14xf32>, %pos: index) 301 -> tensor<1x14xf32> 302{ 303 %0 = tensor.insert_slice %t into %r0[2] [1] [1] 304 : tensor<f32> into tensor<1xf32> 305 %1 = tensor.insert_slice %0 into %r1[3, %pos] [1, 1] [1, 1] 306 : tensor<1xf32> into tensor<1x14xf32> 307 return %1 : tensor<1x14xf32> 308} 309 310// ----- 311 312// This test fails to fold because the size `4` and `%pos` do not match: 313// this requires a copy 314// CHECK-LABEL: func @fail_insert_slice_of_insert_slice( 315// CHECK: tensor.insert_slice 316// CHECK: tensor.insert_slice 317func.func @fail_insert_slice_of_insert_slice( 318 %t: tensor<4xf32>, %r0: tensor<?xf32>, %r1: tensor<?x?xf32>, %pos: index) 319 -> tensor<?x?xf32> 320{ 321 %0 = tensor.insert_slice %t into %r0[%pos] [4] [1] 322 : tensor<4xf32> into tensor<?xf32> 323 %1 = tensor.insert_slice %0 into %r1[%pos, 423] [%pos, 1] [1, 1] 324 : tensor<?xf32> into tensor<?x?xf32> 325 return %1 : tensor<?x?xf32> 326} 327 328// ----- 329 330// Here the sizes are the same and the folding occurs properly. 331// CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2)> 332// CHECK-LABEL: func @insert_slice_of_insert_slice_dynamic( 333// CHECK-SAME: %[[t:[0-9a-z]*]]: tensor<?xf32> 334// CHECK-SAME: %[[r0:[0-9a-z]*]]: tensor<?xf32> 335// CHECK-SAME: %[[r1:[0-9a-z]*]]: tensor<?x?xf32> 336// CHECK-SAME: %[[pos:[0-9a-z]*]]: index 337// CHECK: %[[add:.*]] = affine.apply #[[$map]]()[%[[pos]]] 338// CHECK: tensor.insert_slice %[[t]] into %[[r1]][%[[add]], 423] [%[[pos]], 1] [1, 1] : tensor<?xf32> into tensor<?x?xf32> 339func.func @insert_slice_of_insert_slice_dynamic( 340 %t: tensor<?xf32>, %r0: tensor<?xf32>, %r1: tensor<?x?xf32>, %pos: index) 341 -> tensor<?x?xf32> 342{ 343 %0 = tensor.insert_slice %t into %r0[%pos] [%pos] [1] 344 : tensor<?xf32> into tensor<?xf32> 345 %1 = tensor.insert_slice %0 into %r1[%pos, 423] [%pos, 1] [1, 1] 346 : tensor<?xf32> into tensor<?x?xf32> 347 return %1 : tensor<?x?xf32> 348} 349 350// ----- 351 352// Here the sizes are the same and the folding occurs properly. 353// CHECK: #[[$map:.*]] = affine_map<()[s0] -> (s0 * 2)> 354// CHECK-LABEL: func @insert_slice_of_insert_slice_dynamic( 355// CHECK-SAME: %[[t:[0-9a-z]*]]: tensor<?xf32> 356// CHECK-SAME: %[[r0:[0-9a-z]*]]: tensor<?xf32> 357// CHECK-SAME: %[[r1:[0-9a-z]*]]: tensor<?x?xf32> 358// CHECK-SAME: %[[pos:[0-9a-z]*]]: index 359// CHECK: %[[add:.*]] = affine.apply #[[$map]]()[%[[pos]]] 360// CHECK: tensor.insert_slice %[[t]] into %[[r1]][%[[add]], 423] [%[[pos]], 1] [1, 1] : tensor<?xf32> into tensor<?x?xf32> 361func.func @insert_slice_of_insert_slice_dynamic( 362 %t: tensor<?xf32>, %r0: tensor<?xf32>, %r1: tensor<?x?xf32>, %pos: index) 363 -> tensor<?x?xf32> 364{ 365 %0 = tensor.insert_slice %t into %r0[%pos] [%pos] [1] 366 : tensor<?xf32> into tensor<?xf32> 367 %1 = tensor.insert_slice %0 into %r1[%pos, 423] [%pos, 1] [1, 1] 368 : tensor<?xf32> into tensor<?x?xf32> 369 return %1 : tensor<?x?xf32> 370} 371 372// ----- 373 374// CHECK: #[[$map:.*]] = affine_map<()[s0, s1] -> (s0 + s1)> 375// CHECK-LABEL: func @parallel_insert_slice_of_insert_slice_dynamic( 376// CHECK-SAME: %[[t:[0-9a-z]*]]: tensor<12x34xf32> 377// CHECK-SAME: %[[o0:[0-9a-z]*]]: index 378// CHECK-SAME: %[[o1:[0-9a-z]*]]: index 379// CHECK-SAME: %[[sz0:[0-9a-z]*]]: index 380// CHECK-SAME: %[[sz1:[0-9a-z]*]]: index 381func.func @parallel_insert_slice_of_insert_slice_dynamic( 382 %t: tensor<12x34xf32>, %o0: index, %o1: index, %sz0: index, %sz1: index) 383 -> tensor<12x34xf32>{ 384 385 // CHECK: scf.forall {{.*}} shared_outs(%[[out:.*]] = %[[t]] 386 %0 = scf.forall (%arg0, %arg1) in (27, 8) shared_outs(%arg2 = %t) -> (tensor<12x34xf32>) { 387 // CHECK: %[[tt:.*]] = "make_me_a_tensor"() : () -> tensor<?x?xf32> 388 %tt = "make_me_a_tensor"() : () -> tensor<?x?xf32> 389 %tt2 = "make_me_another_tensor"() : () -> tensor<?x?xf32> 390 %inserted_slice = tensor.insert_slice %tt into %tt2[%o1, 0] [%sz0, %sz1] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 391 392 // CHECK: %[[add:.*]] = affine.apply #[[$map]]()[%[[o0]], %[[o1]]] 393 // CHECK: scf.forall.in_parallel 394 // CHECK: tensor.parallel_insert_slice %[[tt]] into %[[out]][%[[add]], %[[o1]]] [%[[sz0]], %[[sz1]]] [1, 1] 395 // CHECK-SAME: : tensor<?x?xf32> into tensor<12x34xf32> 396 scf.forall.in_parallel { 397 tensor.parallel_insert_slice %inserted_slice into %arg2[%o0, %o1] [%sz0, %sz1] [1, 1] 398 : tensor<?x?xf32> into tensor<12x34xf32> 399 } 400 } 401 return %0: tensor<12x34xf32> 402} 403