1 2// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='builtin.module(gpu.module(gpu.func(test-gpu-memory-promotion)))' -split-input-file %s | FileCheck %s 3 4gpu.module @foo { 5 6 // Verify that the attribution was indeed introduced 7 // CHECK-LABEL: @memref3d 8 // CHECK-SAME: (%[[arg:.*]]: memref<5x4xf32> 9 // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<5x4xf32, #gpu.address_space<workgroup>>) 10 gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel { 11 // Verify that loop bounds are emitted, the order does not matter. 12 // CHECK-DAG: %[[c1:.*]] = arith.constant 1 13 // CHECK-DAG: %[[c4:.*]] = arith.constant 4 14 // CHECK-DAG: %[[c5:.*]] = arith.constant 5 15 // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x 16 // CHECK-DAG: %[[ty:.*]] = gpu.thread_id y 17 // CHECK-DAG: %[[tz:.*]] = gpu.thread_id z 18 // CHECK-DAG: %[[bdx:.*]] = gpu.block_dim x 19 // CHECK-DAG: %[[bdy:.*]] = gpu.block_dim y 20 // CHECK-DAG: %[[bdz:.*]] = gpu.block_dim z 21 22 // Verify that loops for the copy are emitted. We only check the number of 23 // loops here since their bounds are produced by mapLoopToProcessorIds, 24 // tested separately. 25 // CHECK: scf.for %[[i0:.*]] = 26 // CHECK: scf.for %[[i1:.*]] = 27 // CHECK: scf.for %[[i2:.*]] = 28 29 // Verify that the copy is emitted and uses only the last two loops. 30 // CHECK: %[[v:.*]] = memref.load %[[arg]][%[[i1]], %[[i2]]] 31 // CHECK: store %[[v]], %[[promoted]][%[[i1]], %[[i2]]] 32 33 // Verify that the use has been rewritten. 34 // CHECK: "use"(%[[promoted]]) : (memref<5x4xf32, #gpu.address_space<workgroup>>) 35 "use"(%arg0) : (memref<5x4xf32>) -> () 36 37 38 // Verify that loops for the copy are emitted. We only check the number of 39 // loops here since their bounds are produced by mapLoopToProcessorIds, 40 // tested separately. 41 // CHECK: scf.for %[[i0:.*]] = 42 // CHECK: scf.for %[[i1:.*]] = 43 // CHECK: scf.for %[[i2:.*]] = 44 45 // Verify that the copy is emitted and uses only the last two loops. 46 // CHECK: %[[v:.*]] = memref.load %[[promoted]][%[[i1]], %[[i2]]] 47 // CHECK: store %[[v]], %[[arg]][%[[i1]], %[[i2]]] 48 gpu.return 49 } 50} 51 52// ----- 53 54gpu.module @foo { 55 56 // Verify that the attribution was indeed introduced 57 // CHECK-LABEL: @memref5d 58 // CHECK-SAME: (%[[arg:.*]]: memref<8x7x6x5x4xf32> 59 // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<8x7x6x5x4xf32, #gpu.address_space<workgroup>>) 60 gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel { 61 // Verify that loop bounds are emitted, the order does not matter. 62 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 63 // CHECK-DAG: %[[c1:.*]] = arith.constant 1 64 // CHECK-DAG: %[[c4:.*]] = arith.constant 4 65 // CHECK-DAG: %[[c5:.*]] = arith.constant 5 66 // CHECK-DAG: %[[c6:.*]] = arith.constant 6 67 // CHECK-DAG: %[[c7:.*]] = arith.constant 7 68 // CHECK-DAG: %[[c8:.*]] = arith.constant 8 69 // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x 70 // CHECK-DAG: %[[ty:.*]] = gpu.thread_id y 71 // CHECK-DAG: %[[tz:.*]] = gpu.thread_id z 72 // CHECK-DAG: %[[bdx:.*]] = gpu.block_dim x 73 // CHECK-DAG: %[[bdy:.*]] = gpu.block_dim y 74 // CHECK-DAG: %[[bdz:.*]] = gpu.block_dim z 75 76 // Verify that loops for the copy are emitted. 77 // CHECK: scf.for %[[i0:.*]] = 78 // CHECK: scf.for %[[i1:.*]] = 79 // CHECK: scf.for %[[i2:.*]] = 80 // CHECK: scf.for %[[i3:.*]] = 81 // CHECK: scf.for %[[i4:.*]] = 82 83 // Verify that the copy is emitted. 84 // CHECK: %[[v:.*]] = memref.load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] 85 // CHECK: store %[[v]], %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] 86 87 // Verify that the use has been rewritten. 88 // CHECK: "use"(%[[promoted]]) : (memref<8x7x6x5x4xf32, #gpu.address_space<workgroup>>) 89 "use"(%arg0) : (memref<8x7x6x5x4xf32>) -> () 90 91 // Verify that loop loops for the copy are emitted. 92 // CHECK: scf.for %[[i0:.*]] = 93 // CHECK: scf.for %[[i1:.*]] = 94 // CHECK: scf.for %[[i2:.*]] = 95 // CHECK: scf.for %[[i3:.*]] = 96 // CHECK: scf.for %[[i4:.*]] = 97 98 // Verify that the copy is emitted. 99 // CHECK: %[[v:.*]] = memref.load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] 100 // CHECK: store %[[v]], %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]] 101 gpu.return 102 } 103} 104 105// ----- 106 107gpu.module @foo { 108 109 // Check that attribution insertion works fine. 110 // CHECK-LABEL: @insert 111 // CHECK-SAME: (%{{.*}}: memref<4xf32> 112 // CHECK-SAME: workgroup(%{{.*}}: memref<1x1xf64, #gpu.address_space<workgroup>> 113 // CHECK-SAME: %[[wg2:.*]] : memref<4xf32, #gpu.address_space<workgroup>>) 114 // CHECK-SAME: private(%{{.*}}: memref<1x1xi64, 5>) 115 gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup}) 116 workgroup(%arg1: memref<1x1xf64, #gpu.address_space<workgroup>>) 117 private(%arg2: memref<1x1xi64, 5>) 118 kernel { 119 // CHECK: "use"(%[[wg2]]) 120 "use"(%arg0) : (memref<4xf32>) -> () 121 gpu.return 122 } 123} 124