1// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(scf-for-loop-range-folding))' -split-input-file | FileCheck %s 2 3func.func @fold_one_loop(%arg0: memref<?xi32>, %arg1: index, %arg2: index) { 4 %c0 = arith.constant 0 : index 5 %c1 = arith.constant 1 : index 6 %c4 = arith.constant 4 : index 7 scf.for %i = %c0 to %arg1 step %c1 { 8 %0 = arith.addi %arg2, %i : index 9 %1 = arith.muli %0, %c4 : index 10 %2 = memref.load %arg0[%1] : memref<?xi32> 11 %3 = arith.muli %2, %2 : i32 12 memref.store %3, %arg0[%1] : memref<?xi32> 13 } 14 return 15} 16 17// CHECK-LABEL: func @fold_one_loop 18// CHECK-SAME: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}} 19// CHECK: %[[C0:.*]] = arith.constant 0 : index 20// CHECK: %[[C1:.*]] = arith.constant 1 : index 21// CHECK: %[[C4:.*]] = arith.constant 4 : index 22// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index 23// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index 24// CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index 25// CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index 26// CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index 27// CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] { 28// CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]] 29// CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32 30// CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]] 31 32func.func @fold_one_loop2(%arg0: memref<?xi32>, %arg1: index, %arg2: index) { 33 %c0 = arith.constant 0 : index 34 %c1 = arith.constant 1 : index 35 %c4 = arith.constant 4 : index 36 %c10 = arith.constant 10 : index 37 scf.for %j = %c0 to %c10 step %c1 { 38 scf.for %i = %c0 to %arg1 step %c1 { 39 %0 = arith.addi %arg2, %i : index 40 %1 = arith.muli %0, %c4 : index 41 %2 = memref.load %arg0[%1] : memref<?xi32> 42 %3 = arith.muli %2, %2 : i32 43 memref.store %3, %arg0[%1] : memref<?xi32> 44 } 45 } 46 return 47} 48 49// CHECK-LABEL: func @fold_one_loop2 50// CHECK-SAME: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}} 51// CHECK: %[[C0:.*]] = arith.constant 0 : index 52// CHECK: %[[C1:.*]] = arith.constant 1 : index 53// CHECK: %[[C4:.*]] = arith.constant 4 : index 54// CHECK: %[[C10:.*]] = arith.constant 10 : index 55// CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[C10]] step %[[C1]] { 56// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index 57// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index 58// CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index 59// CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index 60// CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index 61// CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] { 62// CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]] 63// CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32 64// CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]] 65 66func.func @fold_two_loops(%arg0: memref<?xi32>, %arg1: index, %arg2: index) { 67 %c0 = arith.constant 0 : index 68 %c1 = arith.constant 1 : index 69 %c4 = arith.constant 4 : index 70 %c10 = arith.constant 10 : index 71 scf.for %j = %c0 to %c10 step %c1 { 72 scf.for %i = %j to %arg1 step %c1 { 73 %0 = arith.addi %arg2, %i : index 74 %1 = arith.muli %0, %c4 : index 75 %2 = memref.load %arg0[%1] : memref<?xi32> 76 %3 = arith.muli %2, %2 : i32 77 memref.store %3, %arg0[%1] : memref<?xi32> 78 } 79 } 80 return 81} 82 83// CHECK-LABEL: func @fold_two_loops 84// CHECK-SAME: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}} 85// CHECK: %[[C0:.*]] = arith.constant 0 : index 86// CHECK: %[[C1:.*]] = arith.constant 1 : index 87// CHECK: %[[C4:.*]] = arith.constant 4 : index 88// CHECK: %[[C10:.*]] = arith.constant 10 : index 89// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index 90// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[C10]] : index 91// CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index 92// CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index 93// CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index 94// CHECK: scf.for %[[J:.*]] = %[[I2]] to %[[I3]] step %[[I4]] { 95// CHECK: %[[I5:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index 96// CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[C4]] : index 97// CHECK: %[[I7:.*]] = arith.muli %[[C1]], %[[C4]] : index 98// CHECK: scf.for %[[I:.*]] = %[[J]] to %[[I6]] step %[[I7]] { 99// CHECK: %[[I8:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]] 100// CHECK: %[[I9:.*]] = arith.muli %[[I8]], %[[I8]] : i32 101// CHECK: memref.store %[[I9]], %[[ARG0]]{{\[}}%[[I]] 102 103// If an instruction's operands are not defined outside the loop, we cannot 104// perform the optimization, as is the case with the arith.muli below. (If 105// paired with loop invariant code motion we can continue.) 106func.func @fold_only_first_add(%arg0: memref<?xi32>, %arg1: index, %arg2: index) { 107 %c0 = arith.constant 0 : index 108 %c1 = arith.constant 1 : index 109 %c4 = arith.constant 4 : index 110 scf.for %i = %c0 to %arg1 step %c1 { 111 %0 = arith.addi %arg2, %i : index 112 %1 = arith.addi %arg2, %c4 : index 113 %2 = arith.muli %0, %1 : index 114 %3 = memref.load %arg0[%2] : memref<?xi32> 115 %4 = arith.muli %3, %3 : i32 116 memref.store %4, %arg0[%2] : memref<?xi32> 117 } 118 return 119} 120 121// CHECK-LABEL: func @fold_only_first_add 122// CHECK-SAME: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}} 123// CHECK: %[[C0:.*]] = arith.constant 0 : index 124// CHECK: %[[C1:.*]] = arith.constant 1 : index 125// CHECK: %[[C4:.*]] = arith.constant 4 : index 126// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index 127// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index 128// CHECK: scf.for %[[I:.*]] = %[[I0]] to %[[I1]] step %[[C1]] { 129// CHECK: %[[I2:.*]] = arith.addi %[[ARG2]], %[[C4]] : index 130// CHECK: %[[I3:.*]] = arith.muli %[[I]], %[[I2]] : index 131// CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I3]] 132// CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32 133// CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I3]] 134