1// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=1 gpu-thread-dims=1}))" %s | FileCheck --check-prefix=CHECK-11 %s 2// RUN: mlir-opt -pass-pipeline="builtin.module(func.func(convert-affine-for-to-gpu{gpu-block-dims=2 gpu-thread-dims=2}))" %s | FileCheck --check-prefix=CHECK-22 %s 3 4// CHECK-11-LABEL: @step_1 5// CHECK-22-LABEL: @step_1 6func.func @step_1(%A : memref<?x?x?x?xf32>, %B : memref<?x?x?x?xf32>) { 7 // Bounds of the loop, its range and step. 8 // CHECK-11-NEXT: %{{.*}} = arith.constant 0 : index 9 // CHECK-11-NEXT: %{{.*}} = arith.constant 42 : index 10 // CHECK-11-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index 11 // CHECK-11-NEXT: %{{.*}} = arith.constant 1 : index 12 // 13 // CHECK-22-NEXT: %{{.*}} = arith.constant 0 : index 14 // CHECK-22-NEXT: %{{.*}} = arith.constant 42 : index 15 // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index 16 // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index 17 affine.for %i = 0 to 42 { 18 19 // Bounds of the loop, its range and step. 20 // CHECK-11-NEXT: %{{.*}} = arith.constant 0 : index 21 // CHECK-11-NEXT: %{{.*}} = arith.constant 10 : index 22 // CHECK-11-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index 23 // CHECK-11-NEXT: %{{.*}} = arith.constant 1 : index 24 // 25 // CHECK-22-NEXT: %{{.*}} = arith.constant 0 : index 26 // CHECK-22-NEXT: %{{.*}} = arith.constant 10 : index 27 // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index 28 // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index 29 affine.for %j = 0 to 10 { 30 // CHECK-11: gpu.launch 31 // CHECK-11-SAME: blocks 32 // CHECK-11-SAME: threads 33 34 // Remapping of the loop induction variables. 35 // CHECK-11: %[[i:.*]] = arith.addi %{{.*}}, %{{.*}} : index 36 // CHECK-11-NEXT: %[[j:.*]] = arith.addi %{{.*}}, %{{.*}} : index 37 38 // This loop is not converted if mapping to 1, 1 dimensions. 39 // CHECK-11-NEXT: affine.for %[[ii:.*]] = 2 to 16 40 // 41 // Bounds of the loop, its range and step. 42 // CHECK-22-NEXT: %{{.*}} = arith.constant 2 : index 43 // CHECK-22-NEXT: %{{.*}} = arith.constant 16 : index 44 // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index 45 // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index 46 affine.for %ii = 2 to 16 { 47 // This loop is not converted if mapping to 1, 1 dimensions. 48 // CHECK-11-NEXT: affine.for %[[jj:.*]] = 5 to 17 49 // 50 // Bounds of the loop, its range and step. 51 // CHECK-22-NEXT: %{{.*}} = arith.constant 5 : index 52 // CHECK-22-NEXT: %{{.*}} = arith.constant 17 : index 53 // CHECK-22-NEXT: %{{.*}} = arith.subi %{{.*}}, %{{.*}} : index 54 // CHECK-22-NEXT: %{{.*}} = arith.constant 1 : index 55 affine.for %jj = 5 to 17 { 56 // CHECK-22: gpu.launch 57 // CHECK-22-SAME: blocks 58 // CHECK-22-SAME: threads 59 60 // Remapping of the loop induction variables in the last mapped scf. 61 // CHECK-22: %[[i:.*]] = arith.addi %{{.*}}, %{{.*}} : index 62 // CHECK-22-NEXT: %[[j:.*]] = arith.addi %{{.*}}, %{{.*}} : index 63 // CHECK-22-NEXT: %[[ii:.*]] = arith.addi %{{.*}}, %{{.*}} : index 64 // CHECK-22-NEXT: %[[jj:.*]] = arith.addi %{{.*}}, %{{.*}} : index 65 66 // Using remapped values instead of loop iterators. 67 // CHECK-11: {{.*}} = memref.load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32> 68 // CHECK-22: {{.*}} = memref.load %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32> 69 %0 = memref.load %A[%i, %j, %ii, %jj] : memref<?x?x?x?xf32> 70 // CHECK-11-NEXT: memref.store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32> 71 // CHECK-22-NEXT: memref.store {{.*}}, %{{.*}}[%[[i]], %[[j]], %[[ii]], %[[jj]]] : memref<?x?x?x?xf32> 72 memref.store %0, %B[%i, %j, %ii, %jj] : memref<?x?x?x?xf32> 73 74 // CHECK-11: gpu.terminator 75 // CHECK-22: gpu.terminator 76 } 77 } 78 } 79 } 80 return 81} 82 83