1// RUN: mlir-opt -gpu-map-parallel-loops -split-input-file %s | FileCheck %s 2 3func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index, 4 %arg3 : index) { 5 %zero = arith.constant 0 : index 6 %one = arith.constant 1 : index 7 %four = arith.constant 4 : index 8 scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) 9 step (%four, %four) { 10 scf.parallel (%si0, %si1) = (%zero, %zero) to (%four, %four) 11 step (%one, %one) { 12 } 13 } 14 return 15} 16 17// CHECK-LABEL: func @parallel_loop( 18// CHECK: scf.parallel 19// CHECK: scf.parallel 20// CHECK: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>, 21// CHECK-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>]} 22// CHECK: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>, 23// CHECK-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>]} 24// CHECK-NOT: mapping 25 26// ----- 27 28func.func @parallel_loop_4d(%arg0 : index, %arg1 : index, %arg2 : index, 29 %arg3 : index) { 30 %zero = arith.constant 0 : index 31 %one = arith.constant 1 : index 32 %four = arith.constant 4 : index 33 scf.parallel (%i0, %i1, %i2, %i3) = (%zero, %zero, %zero, %zero) to (%arg0, %arg1, %arg2, %arg3) 34 step (%four, %four, %four, %four) { 35 scf.parallel (%si0, %si1, %si2, %si3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four) 36 step (%one, %one, %one, %one) { 37 scf.parallel (%ti0, %ti1, %ti2, %ti3) = (%zero, %zero, %zero, %zero) to (%four, %four, %four, %four) 38 step (%one, %one, %one, %one) { 39 } 40 } 41 } 42 return 43} 44 45// CHECK-LABEL: func @parallel_loop_4d( 46// CHECK: scf.parallel 47// CHECK: scf.parallel 48// CHECK: scf.parallel 49// CHECK: {mapping = [#gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>, 50// CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>, 51// CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>, 52// CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]} 53// CHECK: {mapping = [#gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>, 54// CHECK-SAME: #gpu.loop_dim_map<processor = thread_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 55// CHECK-SAME: #gpu.loop_dim_map<processor = thread_z, map = (d0) -> (d0), bound = (d0) -> (d0)>, 56// CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]} 57// CHECK: {mapping = [#gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>, 58// CHECK-SAME: #gpu.loop_dim_map<processor = block_y, map = (d0) -> (d0), bound = (d0) -> (d0)>, 59// CHECK-SAME: #gpu.loop_dim_map<processor = block_z, map = (d0) -> (d0), bound = (d0) -> (d0)>, 60// CHECK-SAME: #gpu.loop_dim_map<processor = sequential, map = (d0) -> (d0), bound = (d0) -> (d0)>]} 61// CHECK-NOT: mapping 62