186ad0af8SEugene Zhulenev// RUN: mlir-opt %s -split-input-file -async-parallel-for=async-dispatch=true \ 26c1f6558SEugene Zhulenev// RUN: | FileCheck %s --dump-input=always 386ad0af8SEugene Zhulenev 46c1f6558SEugene Zhulenev// CHECK-LABEL: @loop_1d( 56c1f6558SEugene Zhulenev// CHECK-SAME: %[[LB:.*]]: index, %[[UB:.*]]: index, %[[STEP:.*]]: index 6*5e7dea22SRiver Riddlefunc.func @loop_1d(%arg0: index, %arg1: index, %arg2: index, %arg3: memref<?xf32>) { 7a54f4eaeSMogball // CHECK: %[[C0:.*]] = arith.constant 0 : index 86c1f6558SEugene Zhulenev 9a54f4eaeSMogball // CHECK: %[[RANGE:.*]] = arith.subi %[[UB]], %[[LB]] 10a54f4eaeSMogball // CHECK: %[[TRIP_CNT:.*]] = arith.ceildivsi %[[RANGE]], %[[STEP]] 11a54f4eaeSMogball // CHECK: %[[IS_NOOP:.*]] = arith.cmpi eq, %[[TRIP_CNT]], %[[C0]] : index 126c1f6558SEugene Zhulenev 136c1f6558SEugene Zhulenev // CHECK: scf.if %[[IS_NOOP]] { 146c1f6558SEugene Zhulenev // CHECK-NEXT: } else { 15a8f819c6SEugene Zhulenev // CHECK: scf.if {{.*}} { 16a8f819c6SEugene Zhulenev // CHECK: call @parallel_compute_fn(%[[C0]] 17a8f819c6SEugene Zhulenev // CHECK: } else { 18bdde9595Sbakhtiyar // CHECK: %[[GROUP:.*]] = async.create_group 1986ad0af8SEugene Zhulenev // CHECK: call @async_dispatch_fn 2086ad0af8SEugene Zhulenev // CHECK: async.await_all %[[GROUP]] 216c1f6558SEugene Zhulenev // CHECK: } 22bdde9595Sbakhtiyar // CHECK: } 2386ad0af8SEugene Zhulenev scf.parallel (%i) = (%arg0) to (%arg1) step (%arg2) { 24a54f4eaeSMogball %one = arith.constant 1.0 : f32 2586ad0af8SEugene Zhulenev memref.store %one, %arg3[%i] : memref<?xf32> 2686ad0af8SEugene Zhulenev } 2786ad0af8SEugene Zhulenev return 2886ad0af8SEugene Zhulenev} 2986ad0af8SEugene Zhulenev 3086ad0af8SEugene Zhulenev// CHECK-LABEL: func private @parallel_compute_fn 3186ad0af8SEugene Zhulenev// CHECK: scf.for 3286ad0af8SEugene Zhulenev// CHECK: memref.store 3386ad0af8SEugene Zhulenev 3486ad0af8SEugene Zhulenev// CHECK-LABEL: func private @async_dispatch_fn 3534a164c9SEugene Zhulenev// CHECK-SAME: ( 3686ad0af8SEugene Zhulenev// CHECK-SAME: %[[GROUP:arg0]]: !async.group, 3786ad0af8SEugene Zhulenev// CHECK-SAME: %[[BLOCK_START:arg1]]: index 3886ad0af8SEugene Zhulenev// CHECK-SAME: %[[BLOCK_END:arg2]]: index 3934a164c9SEugene Zhulenev// CHECK-SAME: ) 40a54f4eaeSMogball// CHECK: %[[C1:.*]] = arith.constant 1 : index 41a54f4eaeSMogball// CHECK: %[[C2:.*]] = arith.constant 2 : index 4234a164c9SEugene Zhulenev// CHECK: scf.while (%[[S0:.*]] = %[[BLOCK_START]], 4334a164c9SEugene Zhulenev// CHECK-SAME: %[[E0:.*]] = %[[BLOCK_END]]) 4434a164c9SEugene Zhulenev// While loop `before` block decides if we need to dispatch more tasks. 4534a164c9SEugene Zhulenev// CHECK: { 46a54f4eaeSMogball// CHECK: %[[DIFF0:.*]] = arith.subi %[[E0]], %[[S0]] 47a54f4eaeSMogball// CHECK: %[[COND:.*]] = arith.cmpi sgt, %[[DIFF0]], %[[C1]] 4834a164c9SEugene Zhulenev// CHECK: scf.condition(%[[COND]]) 4934a164c9SEugene Zhulenev// While loop `after` block splits the range in half and submits async task 5034a164c9SEugene Zhulenev// to process the second half using the call to the same dispatch function. 5186ad0af8SEugene Zhulenev// CHECK: } do { 5234a164c9SEugene Zhulenev// CHECK: ^bb0(%[[S1:.*]]: index, %[[E1:.*]]: index): 53a54f4eaeSMogball// CHECK: %[[DIFF1:.*]] = arith.subi %[[E1]], %[[S1]] 54a54f4eaeSMogball// CHECK: %[[HALF:.*]] = arith.divsi %[[DIFF1]], %[[C2]] 55a54f4eaeSMogball// CHECK: %[[MID:.*]] = arith.addi %[[S1]], %[[HALF]] 5686ad0af8SEugene Zhulenev// CHECK: %[[TOKEN:.*]] = async.execute 5786ad0af8SEugene Zhulenev// CHECK: call @async_dispatch_fn 5886ad0af8SEugene Zhulenev// CHECK: async.add_to_group 5934a164c9SEugene Zhulenev// CHECK: scf.yield %[[S1]], %[[MID]] 6086ad0af8SEugene Zhulenev// CHECK: } 6134a164c9SEugene Zhulenev// After async dispatch the first block processed in the caller thread. 6286ad0af8SEugene Zhulenev// CHECK: call @parallel_compute_fn(%[[BLOCK_START]] 6386ad0af8SEugene Zhulenev 6486ad0af8SEugene Zhulenev// ----- 6586ad0af8SEugene Zhulenev 6686ad0af8SEugene Zhulenev// CHECK-LABEL: @loop_2d 67*5e7dea22SRiver Riddlefunc.func @loop_2d(%arg0: index, %arg1: index, %arg2: index, // lb, ub, step 6886ad0af8SEugene Zhulenev %arg3: index, %arg4: index, %arg5: index, // lb, ub, step 6986ad0af8SEugene Zhulenev %arg6: memref<?x?xf32>) { 7086ad0af8SEugene Zhulenev // CHECK: %[[GROUP:.*]] = async.create_group 7186ad0af8SEugene Zhulenev // CHECK: call @async_dispatch_fn 7286ad0af8SEugene Zhulenev // CHECK: async.await_all %[[GROUP]] 7386ad0af8SEugene Zhulenev scf.parallel (%i0, %i1) = (%arg0, %arg3) to (%arg1, %arg4) 7486ad0af8SEugene Zhulenev step (%arg2, %arg5) { 75a54f4eaeSMogball %one = arith.constant 1.0 : f32 7686ad0af8SEugene Zhulenev memref.store %one, %arg6[%i0, %i1] : memref<?x?xf32> 7786ad0af8SEugene Zhulenev } 7886ad0af8SEugene Zhulenev return 7986ad0af8SEugene Zhulenev} 8086ad0af8SEugene Zhulenev 8186ad0af8SEugene Zhulenev// CHECK-LABEL: func private @parallel_compute_fn 8286ad0af8SEugene Zhulenev// CHECK: scf.for 8386ad0af8SEugene Zhulenev// CHECK: scf.for 8486ad0af8SEugene Zhulenev// CHECK: memref.store 8586ad0af8SEugene Zhulenev 8686ad0af8SEugene Zhulenev// CHECK-LABEL: func private @async_dispatch_fn 87