xref: /llvm-project/mlir/test/Dialect/SCF/parallel-loop-tiling.mlir (revision 38c219b4a8ebe30d781a1ebbb9a9d29b24c28b39)
1// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-parallel-loop-tiling{parallel-loop-tile-sizes=1,4}))' -split-input-file | FileCheck %s
2
3func.func @parallel_loop(%arg0 : index, %arg1 : index, %arg2 : index,
4                    %arg3 : index, %arg4 : index, %arg5 : index,
5		    %A: memref<?x?xf32>, %B: memref<?x?xf32>,
6                    %C: memref<?x?xf32>, %result: memref<?x?xf32>) {
7  scf.parallel (%i0, %i1) = (%arg0, %arg1) to (%arg2, %arg3) step (%arg4, %arg5) {
8    %B_elem = memref.load %B[%i0, %i1] : memref<?x?xf32>
9    %C_elem = memref.load %C[%i0, %i1] : memref<?x?xf32>
10    %sum_elem = arith.addf %B_elem, %C_elem : f32
11    memref.store %sum_elem, %result[%i0, %i1] : memref<?x?xf32>
12  }
13  return
14}
15
16// CHECK:       #[[$MAP:.*]] = affine_map<(d0, d1, d2) -> (d0, d1 - d2)>
17// CHECK-LABEL:   func @parallel_loop(
18// CHECK-SAME:                        [[ARG1:%.*]]: index, [[ARG2:%.*]]: index, [[ARG3:%.*]]: index, [[ARG4:%.*]]: index, [[ARG5:%.*]]: index, [[ARG6:%.*]]: index, [[ARG7:%.*]]: memref<?x?xf32>, [[ARG8:%.*]]: memref<?x?xf32>, [[ARG9:%.*]]: memref<?x?xf32>, [[ARG10:%.*]]: memref<?x?xf32>) {
19// CHECK:           [[C0:%.*]] = arith.constant 0 : index
20// CHECK:           [[C1:%.*]] = arith.constant 1 : index
21// CHECK:           [[C4:%.*]] = arith.constant 4 : index
22// CHECK:           [[V1:%.*]] = arith.muli [[ARG5]], [[C1]] : index
23// CHECK:           [[V2:%.*]] = arith.muli [[ARG6]], [[C4]] : index
24// CHECK:           scf.parallel ([[V3:%.*]], [[V4:%.*]]) = ([[ARG1]], [[ARG2]]) to ([[ARG3]], [[ARG4]]) step ([[V1]], [[V2]]) {
25// CHECK:             [[V5:%.*]] = affine.min #[[$MAP]]([[V1]], [[ARG3]], [[V3]])
26// CHECK:             [[V6:%.*]] = affine.min #[[$MAP]]([[V2]], [[ARG4]], [[V4]])
27// CHECK:             scf.parallel ([[V7:%.*]], [[V8:%.*]]) = ([[C0]], [[C0]]) to ([[V5]], [[V6]]) step ([[ARG5]], [[ARG6]]) {
28// CHECK:               [[V9:%.*]] = arith.addi [[V7]], [[V3]] : index
29// CHECK:               [[V10:%.*]] = arith.addi [[V8]], [[V4]] : index
30// CHECK:               [[V11:%.*]] = memref.load [[ARG8]]{{\[}}[[V9]], [[V10]]] : memref<?x?xf32>
31// CHECK:               [[V12:%.*]] = memref.load [[ARG9]]{{\[}}[[V9]], [[V10]]] : memref<?x?xf32>
32// CHECK:               [[V13:%.*]] = arith.addf [[V11]], [[V12]] : f32
33// CHECK:               memref.store [[V13]], [[ARG10]]{{\[}}[[V9]], [[V10]]] : memref<?x?xf32>
34// CHECK:             }
35// CHECK:           }
36// CHECK:           return
37
38// -----
39
40func.func @static_loop_with_step() {
41  %c0 = arith.constant 0 : index
42  %c3 = arith.constant 3 : index
43  %c22 = arith.constant 22 : index
44  %c24 = arith.constant 24 : index
45  scf.parallel (%i0, %i1) = (%c0, %c0) to (%c22, %c24) step (%c3, %c3) {
46  }
47  return
48}
49
50// CHECK-LABEL:   func @static_loop_with_step() {
51// CHECK:           [[C0:%.*]] = arith.constant 0 : index
52// CHECK:           [[C3:%.*]] = arith.constant 3 : index
53// CHECK:           [[C22:%.*]] = arith.constant 22 : index
54// CHECK:           [[C24:%.*]] = arith.constant 24 : index
55// CHECK:           [[C0_1:%.*]] = arith.constant 0 : index
56// CHECK:           [[C1:%.*]] = arith.constant 1 : index
57// CHECK:           [[C4:%.*]] = arith.constant 4 : index
58// CHECK:           [[V1:%.*]] = arith.muli [[C3]], [[C1]] : index
59// CHECK:           [[V2:%.*]] = arith.muli [[C3]], [[C4]] : index
60// CHECK:           scf.parallel ([[V3:%.*]], [[V4:%.*]]) = ([[C0]], [[C0]]) to ([[C22]], [[C24]]) step ([[V1]], [[V2]]) {
61// CHECK:             scf.parallel ([[V5:%.*]], [[V6:%.*]]) = ([[C0_1]], [[C0_1]]) to ([[V1]], [[V2]]) step ([[C3]], [[C3]]) {
62// CHECK:               = arith.addi [[V5]], [[V3]] : index
63// CHECK:               = arith.addi [[V6]], [[V4]] : index
64// CHECK:             }
65// CHECK:           }
66// CHECK:           return
67
68// -----
69
70func.func @tile_nested_innermost() {
71  %c2 = arith.constant 2 : index
72  %c0 = arith.constant 0 : index
73  %c1 = arith.constant 1 : index
74  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
75    scf.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
76    }
77  }
78  scf.parallel (%i, %j) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
79  }
80  return
81}
82
83// CHECK-LABEL:   func @tile_nested_innermost() {
84// CHECK:           [[C2:%.*]] = arith.constant 2 : index
85// CHECK:           [[C0:%.*]] = arith.constant 0 : index
86// CHECK:           [[C1:%.*]] = arith.constant 1 : index
87// CHECK:           scf.parallel ([[V1:%.*]], [[V2:%.*]]) = ([[C0]], [[C0]]) to ([[C2]], [[C2]]) step ([[C1]], [[C1]]) {
88// CHECK:             [[C0_1:%.*]] = arith.constant 0 : index
89// CHECK:             [[C1_1:%.*]] = arith.constant 1 : index
90// CHECK:             [[C4:%.*]] = arith.constant 4 : index
91// CHECK:             [[V3:%.*]] = arith.muli [[C1]], [[C1_1]] : index
92// CHECK:             [[V4:%.*]] = arith.muli [[C1]], [[C4]] : index
93// CHECK:             scf.parallel ([[V5:%.*]], [[V6:%.*]]) = ([[C0]], [[C0]]) to ([[C2]], [[C2]]) step ([[V3]], [[V4]]) {
94// CHECK:               [[V7:%.*]] = affine.min #{{.*}}([[V4]], [[C2]], [[V6]])
95// CHECK:               scf.parallel ([[V8:%.*]], [[V9:%.*]]) = ([[C0_1]], [[C0_1]]) to ([[V3]], [[V7]]) step ([[C1]], [[C1]]) {
96// CHECK:                 = arith.addi [[V8]], [[V5]] : index
97// CHECK:                 = arith.addi [[V9]], [[V6]] : index
98// CHECK:               }
99// CHECK:             }
100// CHECK:           }
101// CHECK:           [[C0_2:%.*]] = arith.constant 0 : index
102// CHECK:           [[C1_2:%.*]] = arith.constant 1 : index
103// CHECK:           [[C4_1:%.*]] = arith.constant 4 : index
104// CHECK:           [[V10:%.*]] = arith.muli [[C1]], [[C1_2]] : index
105// CHECK:           [[V11:%.*]] = arith.muli [[C1]], [[C4_1]] : index
106// CHECK:           scf.parallel ([[V12:%.*]], [[V13:%.*]]) = ([[C0]], [[C0]]) to ([[C2]], [[C2]]) step ([[V10]], [[V11]]) {
107// CHECK:             [[V14:%.*]] = affine.min #{{.*}}([[V11]], [[C2]], [[V13]])
108// CHECK:             scf.parallel ([[V15:%.*]], [[V16:%.*]]) = ([[C0_2]], [[C0_2]]) to ([[V10]], [[V14]]) step ([[C1]], [[C1]]) {
109// CHECK:               = arith.addi [[V15]], [[V12]] : index
110// CHECK:               = arith.addi [[V16]], [[V13]] : index
111// CHECK:             }
112// CHECK:           }
113// CHECK:           return
114// CHECK:         }
115
116// -----
117
118func.func @tile_nested_in_non_ploop() {
119  %c0 = arith.constant 0 : index
120  %c1 = arith.constant 1 : index
121  %c2 = arith.constant 2 : index
122  scf.for %i = %c0 to %c2 step %c1 {
123    scf.for %j = %c0 to %c2 step %c1 {
124      scf.parallel (%k, %l) = (%c0, %c0) to (%c2, %c2) step (%c1, %c1) {
125      }
126    }
127  }
128  return
129}
130
131// CHECK-LABEL: func @tile_nested_in_non_ploop
132// CHECK:         scf.for
133// CHECK:           scf.for
134// CHECK:             scf.parallel
135// CHECK:               scf.parallel
136// CHECK:               }
137// CHECK:             }
138// CHECK:           }
139// CHECK:         }
140// CHECK:       }
141