1// RUN: mlir-opt %s -split-input-file | mlir-opt -split-input-file | FileCheck %s 2 3module attributes {gpu.container_module} { 4 5 // CHECK-LABEL: func @matvec 6 // CHECK: %{{.*}} = gpu.wait async 7 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xindex> 8 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64> 9 // CHECK: %{{.*}}, %{{.*}} = gpu.create_coo async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64> 10 // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}} : index into memref<?xf64> 11 // CHECK: %{{.*}}, %{{.*}} = gpu.spmv_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} into f64 12 // CHECK: %{{.*}} = gpu.spmv async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64 13 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} 14 // CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}} 15 // CHECK: gpu.wait [%{{.*}}] 16 // CHECK: return 17 func.func @matvec(%arg0: index) { 18 %token0 = gpu.wait async 19 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex> 20 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64> 21 %spmat, %token4 = gpu.create_coo async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64> 22 %dnvec, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0 : index into memref<?xf64> 23 %bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %spmat, %dnvec, %dnvec into f64 24 %token7 = gpu.spmv async [%token6] %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64 25 %token8 = gpu.destroy_sp_mat async [%token7] %spmat 26 %token9 = gpu.destroy_dn_tensor async [%token8] %dnvec 27 gpu.wait [%token9] 28 return 29 } 30 31 // CHECK-LABEL: func @matmul 32 // CHECK: %{{.*}} = gpu.wait async 33 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xindex> 34 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64> 35 // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64> 36 // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} : index, index into memref<?xf64> 37 // CHECK: %{{.*}}, %{{.*}} = gpu.spmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} into f64 38 // CHECK: %{{.*}} = gpu.spmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64 39 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} 40 // CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}} 41 // CHECK: gpu.wait [%{{.*}}] 42 // CHECK: return 43 func.func @matmul(%arg0: index) { 44 %token0 = gpu.wait async 45 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex> 46 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64> 47 %spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64> 48 %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64> 49 %bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %spmat, %dnmat, %dnmat : index into f64 50 %token7 = gpu.spmm async [%token6] %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64 51 %token8 = gpu.destroy_sp_mat async [%token7] %spmat 52 %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat 53 gpu.wait [%token9] 54 return 55 } 56 57 // CHECK-LABEL: func @spgemm 58 // CHECK: %{{.*}} = gpu.wait async 59 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xindex> 60 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf32> 61 // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32> 62 // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32> 63 // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32> 64 // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_create_descr async [%{{.*}}] 65 // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ WORK_ESTIMATION} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8> 66 // CHECK: %{{.*}}, %{{.*}} = gpu.spgemm_work_estimation_or_compute async [%{{.*}}]{ COMPUTE} %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 into memref<0xi8> 67 // CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} = gpu.spmat_get_size async [%{{.*}}] %{{.*}} 68 // CHECK: %{{.*}} = gpu.set_csr_pointers async [%{{.*}}] %{{.*}}, {{.*}}, {{.*}}, {{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf32> 69 // CHECK: %{{.*}} = gpu.spgemm_copy async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : f32 70 // CHECK: %{{.*}} = gpu.spgemm_destroy_descr async [%{{.*}}] %{{.*}} 71 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} 72 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} 73 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} 74 // CHECK: gpu.wait [%{{.*}}] 75 // CHECK: return 76 func.func @spgemm(%arg0: index) { 77 %token0 = gpu.wait async 78 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex> 79 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf32> 80 %spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32> 81 %spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32> 82 %spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32> 83 %spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5] 84 %alloc = memref.alloc() : memref<0xi8> // nullptr 85 %c0 = arith.constant 0 : index 86 %bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async 87 [%token6]{WORK_ESTIMATION} 88 %spmatA, %spmatB, %spmatC, 89 %spgemmDesc, %c0, %alloc: f32 into memref<0xi8> 90 %bufferSz2, %token8 = gpu.spgemm_work_estimation_or_compute async 91 [%token7]{COMPUTE} 92 %spmatA, %spmatB, %spmatC, 93 %spgemmDesc, %c0, %alloc: f32 into memref<0xi8> 94 %rows, %cols, %nnz, %token9 = gpu.spmat_get_size async [%token8] %spmatC 95 %token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32> 96 %token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32 97 %token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc 98 %token13 = gpu.destroy_sp_mat async [%token12] %spmatA 99 %token14 = gpu.destroy_sp_mat async [%token13] %spmatB 100 %token15 = gpu.destroy_sp_mat async [%token14] %spmatC 101 gpu.wait [%token15] 102 return 103 } 104 105 // CHECK-LABEL: func @sddmm 106 // CHECK: %{{.*}} = gpu.wait async 107 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xindex> 108 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64> 109 // CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64> 110 // CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} : index, index into memref<?xf64> 111 // CHECK: %{{.*}}, %{{.*}} = gpu.sddmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} into f64 112 // CHECK: %{{.*}} = gpu.sddmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64 113 // CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}} 114 // CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}} 115 // CHECK: gpu.wait [%{{.*}}] 116 // CHECK: return 117 func.func @sddmm(%arg0: index) { 118 %token0 = gpu.wait async 119 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex> 120 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64> 121 %spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64> 122 %dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64> 123 %bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %dnmat, %dnmat, %spmat into f64 124 %token7 = gpu.sddmm async [%token6] %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64 125 %token8 = gpu.destroy_sp_mat async [%token7] %spmat 126 %token9 = gpu.destroy_dn_tensor async [%token8] %dnmat 127 gpu.wait [%token9] 128 return 129 } 130 131 // CHECK-LABEL: func @csc_and_bsr 132 // CHECK: %{{.*}} = gpu.wait async 133 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xindex> 134 // CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64> 135 // CHECK: %{{.*}}, %{{.*}} = gpu.create_csc async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64> 136 // CHECK: %{{.*}}, %{{.*}} = gpu.create_bsr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64> 137 // CHECK: gpu.wait [%{{.*}}] 138 // CHECK: return 139 func.func @csc_and_bsr(%arg0: index) { 140 %token0 = gpu.wait async 141 %mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex> 142 %mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64> 143 %csc, %token3 = gpu.create_csc async [%token2] 144 %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 145 : memref<?xindex>, memref<?xindex>, memref<?xf64> 146 %bsr, %token4 = gpu.create_bsr async [%token3] 147 %arg0, %arg0, %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 148 : memref<?xindex>, memref<?xindex>, memref<?xf64> 149 %token5 = gpu.destroy_sp_mat async [%token4] %csc 150 %token6 = gpu.destroy_sp_mat async [%token5] %bsr 151 gpu.wait [%token6] 152 return 153 } 154 155} 156