1// RUN: mlir-opt -allow-unregistered-dialect -split-input-file %s | FileCheck %s 2// RUN: mlir-opt -allow-unregistered-dialect -split-input-file %s -mlir-print-op-generic | FileCheck -check-prefix=GENERIC %s 3 4// Check that the attributes for the affine operations are round-tripped. 5// Check that `affine.yield` is visible in the generic form. 6// CHECK-LABEL: @empty 7func.func @empty() { 8 // CHECK: affine.for 9 // CHECK-NEXT: } {some_attr = true} 10 // 11 // GENERIC: "affine.for"() 12 // GENERIC-NEXT: ^bb0(%{{.*}}: index): 13 // GENERIC-NEXT: "affine.yield"() : () -> () 14 // GENERIC-NEXT: }) 15 affine.for %i = 0 to 10 { 16 } {some_attr = true} 17 18 // CHECK: affine.if 19 // CHECK-NEXT: } {some_attr = true} 20 // 21 // GENERIC: "affine.if"() 22 // GENERIC-NEXT: "affine.yield"() : () -> () 23 // GENERIC-NEXT: }, { 24 // GENERIC-NEXT: }) 25 affine.if affine_set<() : ()> () { 26 } {some_attr = true} 27 28 // CHECK: } else { 29 // CHECK: } {some_attr = true} 30 // 31 // GENERIC: "affine.if"() 32 // GENERIC-NEXT: "affine.yield"() : () -> () 33 // GENERIC-NEXT: }, { 34 // GENERIC-NEXT: "foo"() : () -> () 35 // GENERIC-NEXT: "affine.yield"() : () -> () 36 // GENERIC-NEXT: }) 37 affine.if affine_set<() : ()> () { 38 } else { 39 "foo"() : () -> () 40 } {some_attr = true} 41 42 return 43} 44 45// ----- 46 47// GENERIC: #[[$map:.*]] = affine_map<() -> (0)> 48// GENERIC: #[[$map1:.*]] = affine_map<() -> (10)> 49 50// Check that an explicit affine.yield is not printed in custom format. 51// Check that no extra terminator is introduced. 52// CHECK-LABEL: @affine.yield 53// CHECK-GENERIC-LABEL: @affine.yield 54func.func @affine.yield() { 55 // CHECK: affine.for 56 // CHECK-NEXT: } 57 // 58 // GENERIC: "affine.for"() <{lowerBoundMap = #[[$map]], operandSegmentSizes = array<i32: 0, 0, 0>, step = 1 : index, upperBoundMap = #[[$map1]]}> ({ 59 // GENERIC-NEXT: ^bb0(%{{.*}}: index): 60 // GENERIC-NEXT: "affine.yield"() : () -> () 61 // GENERIC-NEXT: }) : () -> () 62 affine.for %i = 0 to 10 { 63 "affine.yield"() : () -> () 64 } 65 return 66} 67 68// ----- 69 70// CHECK-DAG: #[[$MAP0:map[0-9]*]] = affine_map<(d0)[s0] -> (1000, d0 + 512, s0)> 71// CHECK-DAG: #[[$MAP1:map[0-9]*]] = affine_map<(d0, d1)[s0] -> (d0 - d1, s0 + 512)> 72// CHECK-DAG: #[[$MAP2:map[0-9]*]] = affine_map<()[s0, s1] -> (s0 - s1, 11)> 73// CHECK-DAG: #[[$MAP3:map[0-9]*]] = affine_map<() -> (77, 78, 79)> 74 75// CHECK-LABEL: @affine_min 76func.func @affine_min(%arg0 : index, %arg1 : index, %arg2 : index) { 77 // CHECK: affine.min #[[$MAP0]](%arg0)[%arg1] 78 %0 = affine.min affine_map<(d0)[s0] -> (1000, d0 + 512, s0)> (%arg0)[%arg1] 79 // CHECK: affine.min #[[$MAP1]](%arg0, %arg1)[%arg2] 80 %1 = affine.min affine_map<(d0, d1)[s0] -> (d0 - d1, s0 + 512)> (%arg0, %arg1)[%arg2] 81 // CHECK: affine.min #[[$MAP2]]()[%arg1, %arg2] 82 %2 = affine.min affine_map<()[s0, s1] -> (s0 - s1, 11)> ()[%arg1, %arg2] 83 // CHECK: affine.min #[[$MAP3]]() 84 %3 = affine.min affine_map<()[] -> (77, 78, 79)> ()[] 85 return 86} 87 88// CHECK-LABEL: @affine_max 89func.func @affine_max(%arg0 : index, %arg1 : index, %arg2 : index) { 90 // CHECK: affine.max #[[$MAP0]](%arg0)[%arg1] 91 %0 = affine.max affine_map<(d0)[s0] -> (1000, d0 + 512, s0)> (%arg0)[%arg1] 92 // CHECK: affine.max #[[$MAP1]](%arg0, %arg1)[%arg2] 93 %1 = affine.max affine_map<(d0, d1)[s0] -> (d0 - d1, s0 + 512)> (%arg0, %arg1)[%arg2] 94 // CHECK: affine.max #[[$MAP2]]()[%arg1, %arg2] 95 %2 = affine.max affine_map<()[s0, s1] -> (s0 - s1, 11)> ()[%arg1, %arg2] 96 // CHECK: affine.max #[[$MAP3]]() 97 %3 = affine.max affine_map<()[] -> (77, 78, 79)> ()[] 98 return 99} 100 101// ----- 102 103func.func @valid_symbols(%arg0: index, %arg1: index, %arg2: index) { 104 %c1 = arith.constant 1 : index 105 %c0 = arith.constant 0 : index 106 %0 = memref.alloc(%arg0, %arg1) : memref<?x?xf32> 107 affine.for %arg3 = 0 to %arg2 step 768 { 108 %13 = memref.dim %0, %c1 : memref<?x?xf32> 109 affine.for %arg4 = 0 to %13 step 264 { 110 %18 = memref.dim %0, %c0 : memref<?x?xf32> 111 %20 = memref.subview %0[%c0, %c0][%18,%arg4][%c1,%c1] : memref<?x?xf32> 112 to memref<?x?xf32, strided<[?, ?], offset: ?>> 113 %24 = memref.dim %20, %c0 : memref<?x?xf32, strided<[?, ?], offset: ?>> 114 affine.for %arg5 = 0 to %24 step 768 { 115 "foo"() : () -> () 116 } 117 } 118 } 119 return 120} 121 122// ----- 123 124// Test symbol constraints for ops with AffineScope trait. 125 126// CHECK-LABEL: func @valid_symbol_affine_scope 127func.func @valid_symbol_affine_scope(%n : index, %A : memref<?xf32>) { 128 test.affine_scope { 129 %c1 = arith.constant 1 : index 130 %l = arith.subi %n, %c1 : index 131 // %l, %n are valid symbols since test.affine_scope defines a new affine 132 // scope. 133 affine.for %i = %l to %n { 134 %m = arith.subi %l, %i : index 135 test.affine_scope { 136 // %m and %n are valid symbols. 137 affine.for %j = %m to %n { 138 %v = affine.load %A[%n - 1] : memref<?xf32> 139 affine.store %v, %A[%n - 1] : memref<?xf32> 140 } 141 "terminate"() : () -> () 142 } 143 } 144 "terminate"() : () -> () 145 } 146 return 147} 148 149// ----- 150 151// Test the fact that module op always provides an affine scope. 152 153%idx = "test.foo"() : () -> (index) 154"test.func"() ({ 155^bb0(%A : memref<?xf32>): 156 affine.load %A[%idx] : memref<?xf32> 157 "terminate"() : () -> () 158}) : () -> () 159 160// ----- 161 162// CHECK-LABEL: func @parallel 163// CHECK-SAME: (%[[A:.*]]: memref<100x100xf32>, %[[N:.*]]: index) 164func.func @parallel(%A : memref<100x100xf32>, %N : index) { 165 // CHECK: affine.parallel (%[[I0:.*]], %[[J0:.*]]) = (0, 0) to (symbol(%[[N]]), 100) step (10, 10) 166 affine.parallel (%i0, %j0) = (0, 0) to (symbol(%N), 100) step (10, 10) { 167 // CHECK: affine.parallel (%{{.*}}, %{{.*}}) = (%[[I0]], %[[J0]]) to (%[[I0]] + 10, %[[J0]] + 10) reduce ("minimumf", "maximumf") -> (f32, f32) 168 %0:2 = affine.parallel (%i1, %j1) = (%i0, %j0) to (%i0 + 10, %j0 + 10) reduce ("minimumf", "maximumf") -> (f32, f32) { 169 %2 = affine.load %A[%i0 + %i0, %j0 + %j1] : memref<100x100xf32> 170 affine.yield %2, %2 : f32, f32 171 } 172 } 173 return 174} 175 176// ----- 177 178// CHECK-LABEL: @parallel_min_max 179// CHECK: %[[A:.*]]: index, %[[B:.*]]: index, %[[C:.*]]: index, %[[D:.*]]: index 180func.func @parallel_min_max(%a: index, %b: index, %c: index, %d: index) { 181 // CHECK: affine.parallel (%{{.*}}, %{{.*}}, %{{.*}}) = 182 // CHECK: (max(%[[A]], %[[B]]) 183 // CHECK: to (%[[C]], min(%[[C]], %[[D]]), %[[B]]) 184 affine.parallel (%i, %j, %k) = (max(%a, %b), %b, max(%a, %c)) 185 to (%c, min(%c, %d), %b) { 186 affine.yield 187 } 188 return 189} 190 191// ----- 192 193// CHECK-LABEL: @parallel_no_ivs 194func.func @parallel_no_ivs() { 195 // CHECK: affine.parallel () = () to () 196 affine.parallel () = () to () { 197 affine.yield 198 } 199 return 200} 201 202// ----- 203 204// CHECK-LABEL: func @affine_if 205func.func @affine_if() -> f32 { 206 // CHECK: %[[ZERO:.*]] = arith.constant {{.*}} : f32 207 %zero = arith.constant 0.0 : f32 208 // CHECK: %[[OUT:.*]] = affine.if {{.*}}() -> f32 { 209 %0 = affine.if affine_set<() : ()> () -> f32 { 210 // CHECK: affine.yield %[[ZERO]] : f32 211 affine.yield %zero : f32 212 } else { 213 // CHECK: affine.yield %[[ZERO]] : f32 214 affine.yield %zero : f32 215 } 216 // CHECK: return %[[OUT]] : f32 217 return %0 : f32 218} 219 220// ----- 221 222// Test affine.for with yield values. 223 224#set = affine_set<(d0): (d0 - 10 >= 0)> 225 226// CHECK-LABEL: func @yield_loop 227func.func @yield_loop(%buffer: memref<1024xf32>) -> f32 { 228 %sum_init_0 = arith.constant 0.0 : f32 229 %res = affine.for %i = 0 to 10 step 2 iter_args(%sum_iter = %sum_init_0) -> f32 { 230 %t = affine.load %buffer[%i] : memref<1024xf32> 231 %sum_next = affine.if #set(%i) -> (f32) { 232 %new_sum = arith.addf %sum_iter, %t : f32 233 affine.yield %new_sum : f32 234 } else { 235 affine.yield %sum_iter : f32 236 } 237 affine.yield %sum_next : f32 238 } 239 return %res : f32 240} 241// CHECK: %[[const_0:.*]] = arith.constant 0.000000e+00 : f32 242// CHECK-NEXT: %[[output:.*]] = affine.for %{{.*}} = 0 to 10 step 2 iter_args(%{{.*}} = %[[const_0]]) -> (f32) { 243// CHECK: affine.if #set(%{{.*}}) -> f32 { 244// CHECK: affine.yield %{{.*}} : f32 245// CHECK-NEXT: } else { 246// CHECK-NEXT: affine.yield %{{.*}} : f32 247// CHECK-NEXT: } 248// CHECK-NEXT: affine.yield %{{.*}} : f32 249// CHECK-NEXT: } 250// CHECK-NEXT: return %[[output]] : f32 251 252// CHECK-LABEL: func @affine_for_multiple_yield 253func.func @affine_for_multiple_yield(%buffer: memref<1024xf32>) -> (f32, f32) { 254 %init_0 = arith.constant 0.0 : f32 255 %res1, %res2 = affine.for %i = 0 to 10 step 2 iter_args(%iter_arg1 = %init_0, %iter_arg2 = %init_0) -> (f32, f32) { 256 %t = affine.load %buffer[%i] : memref<1024xf32> 257 %ret1 = arith.addf %t, %iter_arg1 : f32 258 %ret2 = arith.addf %t, %iter_arg2 : f32 259 affine.yield %ret1, %ret2 : f32, f32 260 } 261 return %res1, %res2 : f32, f32 262} 263// CHECK: %[[const_0:.*]] = arith.constant 0.000000e+00 : f32 264// CHECK-NEXT: %[[output:[0-9]+]]:2 = affine.for %{{.*}} = 0 to 10 step 2 iter_args(%[[iter_arg1:.*]] = %[[const_0]], %[[iter_arg2:.*]] = %[[const_0]]) -> (f32, f32) { 265// CHECK: %[[res1:.*]] = arith.addf %{{.*}}, %[[iter_arg1]] : f32 266// CHECK-NEXT: %[[res2:.*]] = arith.addf %{{.*}}, %[[iter_arg2]] : f32 267// CHECK-NEXT: affine.yield %[[res1]], %[[res2]] : f32, f32 268// CHECK-NEXT: } 269 270// ----- 271 272// CHECK-LABEL: func @delinearize 273func.func @delinearize(%linear_idx: index, %basis0: index, %basis1 :index) -> (index, index) { 274 // CHECK: affine.delinearize_index %{{.+}} into (%{{.+}}, %{{.+}}) : index, index 275 %1:2 = affine.delinearize_index %linear_idx into (%basis0, %basis1) : index, index 276 return %1#0, %1#1 : index, index 277} 278 279// CHECK-LABEL: @delinearize_mixed 280func.func @delinearize_mixed(%linear_idx: index, %basis1: index) -> (index, index, index) { 281 // CHECK: affine.delinearize_index %{{.+}} into (2, %{{.+}}, 3) : index, index, index 282 %1:3 = affine.delinearize_index %linear_idx into (2, %basis1, 3) : index, index, index 283 return %1#0, %1#1, %1#2 : index, index, index 284} 285 286// ----- 287 288// CHECK-LABEL: func @linearize 289func.func @linearize(%index0: index, %index1: index, %basis0: index, %basis1 :index) -> index { 290 // CHECK: affine.linearize_index [%{{.+}}, %{{.+}}] by (%{{.+}}, %{{.+}}) : index 291 %1 = affine.linearize_index [%index0, %index1] by (%basis0, %basis1) : index 292 return %1 : index 293} 294 295// CHECK-LABEL: @linearize_mixed 296func.func @linearize_mixed(%index0: index, %index1: index, %index2: index, %basis1: index) -> index { 297 // CHECK: affine.linearize_index disjoint [%{{.+}}, %{{.+}}, %{{.+}}] by (2, %{{.+}}, 3) : index 298 %1 = affine.linearize_index disjoint [%index0, %index1, %index2] by (2, %basis1, 3) : index 299 return %1 : index 300} 301 302// ----- 303 304#map = affine_map<()[s0] -> (s0)> 305 306// CHECK-LABEL: @gpu_affine_for 307 308module attributes {gpu.container_module} { 309 gpu.module @gpu { 310 gpu.func @gpu_affine_for(%arg0: memref<?x?xf32>) kernel { 311 %c3 = arith.constant 1 : index 312 %dim = memref.dim %arg0, %c3 : memref<?x?xf32> 313 %c0 = arith.constant 0 : index 314 affine.for %arg3 = %c0 to #map()[%dim] step 32 { 315 } 316 gpu.return 317 } 318 } 319} 320// CHECK-SAME: (%[[VAL_0:.*]]: memref<?x?xf32>) kernel { 321// CHECK: %[[VAL_1:.*]] = arith.constant 1 : index 322// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32> 323// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index 324// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 { 325// CHECK: } 326// CHECK: gpu.return 327 328// ----- 329 330#map = affine_map<()[s0] -> (s0 mod 32)> 331 332// CHECK: #[[$ATTR_0:.+]] = affine_map<()[s0] -> (s0 mod 32)> 333 334// CHECK-LABEL: gpu.func @affine_thread_id 335 336module { 337 gpu.module @gpu { 338 gpu.func @affine_thread_id(%arg0: memref<?x?xf32>) kernel { 339 %c3 = arith.constant 3 : index 340 %dim = memref.dim %arg0, %c3 : memref<?x?xf32> 341 %c0 = arith.constant 0 : index 342 affine.for %arg3 = %c0 to %dim step 32 { 343 %thread_id_x = gpu.thread_id x 344 %0 = affine.apply #map()[%thread_id_x] 345 %c128 = arith.constant 128 : index 346 affine.for %arg4 = %0 to %c128 step 8 { 347 %c32 = arith.constant 32 : index 348 } 349 } 350 gpu.return 351 } 352 } 353} 354 355// CHECK-SAME: (%[[VAL_0:.*]]: memref<?x?xf32>) kernel { 356// CHECK: %[[VAL_1:.*]] = arith.constant 3 : index 357// CHECK: %[[VAL_2:.*]] = memref.dim %[[VAL_0]], %[[VAL_1]] : memref<?x?xf32> 358// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index 359// CHECK: affine.for %[[VAL_4:.*]] = %[[VAL_3]] to %[[VAL_2]] step 32 { 360// CHECK: %[[VAL_5:.*]] = gpu.thread_id x 361// CHECK: %[[VAL_6:.*]] = affine.apply #[[$ATTR_0]](){{\[}}%[[VAL_5]]] 362// CHECK: %[[VAL_7:.*]] = arith.constant 128 : index 363// CHECK: affine.for %{{.*}} = %[[VAL_6]] to %[[VAL_7]] step 8 { 364 365// ----- 366 367#map = affine_map<(d0)[s0] -> (d0 + s0)> 368 369// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> 370 371// CHECK-LABEL: func @arith_add_vaild_symbol_upper_bound 372 373func.func @arith_add_vaild_symbol_upper_bound(%arg : index) { 374 affine.for %n0 = 0 to 7 { 375 %dim = arith.addi %arg, %arg : index 376 affine.for %n1 = 0 to #map(%dim)[%arg] { 377 } 378 } 379 return 380} 381 382// CHECK-SAME: %[[VAL_0:.*]]: index) { 383// CHECK: affine.for %[[VAL_1:.*]] = 0 to 7 { 384// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_0]] : index 385// CHECK: affine.for %[[VAL_3:.*]] = 0 to #[[$ATTR_0]](%[[VAL_2]]){{\[}}%[[VAL_0]]] { 386// CHECK: } 387// CHECK: } 388 389// ----- 390 391#map = affine_map<(d0)[s0] -> (d0 + s0)> 392 393// CHECK: #[[$ATTR_0:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> 394 395// CHECK-LABEL: func @arith_add_vaild_symbol_lower_bound 396 397func.func @arith_add_vaild_symbol_lower_bound(%arg : index) { 398 affine.for %n0 = 0 to 7 { 399 %dim = arith.addi %arg, %arg : index 400 affine.for %n1 = #map(%dim)[%arg] to 7 { 401 } 402 } 403 return 404} 405 406// CHECK-SAME: %[[VAL_0:.*]]: index) { 407// CHECK: affine.for %[[VAL_1:.*]] = 0 to 7 { 408// CHECK: %[[VAL_2:.*]] = arith.addi %[[VAL_0]], %[[VAL_0]] : index 409// CHECK: affine.for %[[VAL_3:.*]] = #[[$ATTR_0]](%[[VAL_2]]){{\[}}%[[VAL_0]]] to 7 { 410// CHECK: } 411// CHECK: } 412