1// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-nvvm --split-input-file %s | FileCheck --check-prefix=NVVM %s 2// RUN: mlir-opt -allow-unregistered-dialect --convert-gpu-to-rocdl --split-input-file %s | FileCheck --check-prefix=ROCDL %s 3 4gpu.module @kernel { 5 // NVVM-LABEL: llvm.func @private 6 gpu.func @private(%arg0: f32) private(%arg1: memref<4xf32, #gpu.address_space<private>>) { 7 // Allocate private memory inside the function. 8 // NVVM: %[[size:.*]] = llvm.mlir.constant(4 : i64) : i64 9 // NVVM: %[[raw:.*]] = llvm.alloca %[[size]] x f32 : (i64) -> !llvm.ptr 10 11 // ROCDL: %[[size:.*]] = llvm.mlir.constant(4 : i64) : i64 12 // ROCDL: %[[raw:.*]] = llvm.alloca %[[size]] x f32 : (i64) -> !llvm.ptr<5> 13 14 // Populate the memref descriptor. 15 // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> 16 // NVVM: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] 17 // NVVM: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] 18 // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 19 // NVVM: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] 20 // NVVM: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64 21 // NVVM: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] 22 // NVVM: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 23 // NVVM: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] 24 25 // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<5>, ptr<5>, i64, array<1 x i64>, array<1 x i64>)> 26 // ROCDL: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] 27 // ROCDL: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] 28 // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 29 // ROCDL: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] 30 // ROCDL: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64 31 // ROCDL: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] 32 // ROCDL: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 33 // ROCDL: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] 34 35 // "Store" lowering should work just as any other memref, only check that 36 // we emit some core instructions. 37 // NVVM: llvm.extractvalue %[[descr6:.*]] 38 // NVVM: llvm.getelementptr 39 // NVVM: llvm.store 40 41 // ROCDL: llvm.extractvalue %[[descr6:.*]] 42 // ROCDL: llvm.getelementptr 43 // ROCDL: llvm.store 44 %c0 = arith.constant 0 : index 45 memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<private>> 46 47 "terminator"() : () -> () 48 } 49} 50 51// ----- 52 53gpu.module @kernel { 54 // Workgroup buffers are allocated as globals. 55 // NVVM: llvm.mlir.global internal @[[$buffer:.*]]() 56 // NVVM-SAME: addr_space = 3 57 // NVVM-SAME: !llvm.array<4 x f32> 58 59 // ROCDL: llvm.mlir.global internal @[[$buffer:.*]]() 60 // ROCDL-SAME: addr_space = 3 61 // ROCDL-SAME: !llvm.array<4 x f32> 62 63 // NVVM-LABEL: llvm.func @workgroup 64 // NVVM-SAME: { 65 66 // ROCDL-LABEL: llvm.func @workgroup 67 // ROCDL-SAME: { 68 gpu.func @workgroup(%arg0: f32) workgroup(%arg1: memref<4xf32, #gpu.address_space<workgroup>>) { 69 // Get the address of the first element in the global array. 70 // NVVM: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm.ptr<3> 71 // NVVM: %[[raw:.*]] = llvm.getelementptr %[[addr]][0, 0] 72 // NVVM-SAME: !llvm.ptr<3> 73 74 // ROCDL: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm.ptr<3> 75 // ROCDL: %[[raw:.*]] = llvm.getelementptr %[[addr]][0, 0] 76 // ROCDL-SAME: !llvm.ptr<3> 77 78 // Populate the memref descriptor. 79 // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> 80 // NVVM: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] 81 // NVVM: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] 82 // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 83 // NVVM: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] 84 // NVVM: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64 85 // NVVM: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] 86 // NVVM: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 87 // NVVM: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] 88 89 // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> 90 // ROCDL: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] 91 // ROCDL: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] 92 // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 93 // ROCDL: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] 94 // ROCDL: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64 95 // ROCDL: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] 96 // ROCDL: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 97 // ROCDL: %[[descr6:.*]] = llvm.insertvalue %[[c1]], %[[descr5]][4, 0] 98 99 // "Store" lowering should work just as any other memref, only check that 100 // we emit some core instructions. 101 // NVVM: llvm.extractvalue %[[descr6:.*]] 102 // NVVM: llvm.getelementptr 103 // NVVM: llvm.store 104 105 // ROCDL: llvm.extractvalue %[[descr6:.*]] 106 // ROCDL: llvm.getelementptr 107 // ROCDL: llvm.store 108 %c0 = arith.constant 0 : index 109 memref.store %arg0, %arg1[%c0] : memref<4xf32, #gpu.address_space<workgroup>> 110 111 "terminator"() : () -> () 112 } 113} 114 115// ----- 116 117gpu.module @kernel { 118 // Check that the total size was computed correctly. 119 // NVVM: llvm.mlir.global internal @[[$buffer:.*]]() 120 // NVVM-SAME: addr_space = 3 121 // NVVM-SAME: !llvm.array<48 x f32> 122 123 // ROCDL: llvm.mlir.global internal @[[$buffer:.*]]() 124 // ROCDL-SAME: addr_space = 3 125 // ROCDL-SAME: !llvm.array<48 x f32> 126 127 // NVVM-LABEL: llvm.func @workgroup3d 128 // ROCDL-LABEL: llvm.func @workgroup3d 129 gpu.func @workgroup3d(%arg0: f32) workgroup(%arg1: memref<4x2x6xf32, #gpu.address_space<workgroup>>) { 130 // Get the address of the first element in the global array. 131 // NVVM: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm.ptr<3> 132 // NVVM: %[[raw:.*]] = llvm.getelementptr %[[addr]][0, 0] 133 // NVVM-SAME: !llvm.ptr<3> 134 135 // ROCDL: %[[addr:.*]] = llvm.mlir.addressof @[[$buffer]] : !llvm.ptr<3> 136 // ROCDL: %[[raw:.*]] = llvm.getelementptr %[[addr]][0, 0] 137 // ROCDL-SAME: !llvm.ptr<3> 138 139 // Populate the memref descriptor. 140 // NVVM: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<3 x i64>, array<3 x i64>)> 141 // NVVM: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] 142 // NVVM: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] 143 // NVVM: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 144 // NVVM: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] 145 // NVVM: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64 146 // NVVM: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] 147 // NVVM: %[[c12:.*]] = llvm.mlir.constant(12 : index) : i64 148 // NVVM: %[[descr6:.*]] = llvm.insertvalue %[[c12]], %[[descr5]][4, 0] 149 // NVVM: %[[c2:.*]] = llvm.mlir.constant(2 : index) : i64 150 // NVVM: %[[descr7:.*]] = llvm.insertvalue %[[c2]], %[[descr6]][3, 1] 151 // NVVM: %[[c6:.*]] = llvm.mlir.constant(6 : index) : i64 152 // NVVM: %[[descr8:.*]] = llvm.insertvalue %[[c6]], %[[descr7]][4, 1] 153 // NVVM: %[[c6:.*]] = llvm.mlir.constant(6 : index) : i64 154 // NVVM: %[[descr9:.*]] = llvm.insertvalue %[[c6]], %[[descr8]][3, 2] 155 // NVVM: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 156 // NVVM: %[[descr10:.*]] = llvm.insertvalue %[[c1]], %[[descr9]][4, 2] 157 158 // ROCDL: %[[descr1:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<3 x i64>, array<3 x i64>)> 159 // ROCDL: %[[descr2:.*]] = llvm.insertvalue %[[raw]], %[[descr1]][0] 160 // ROCDL: %[[descr3:.*]] = llvm.insertvalue %[[raw]], %[[descr2]][1] 161 // ROCDL: %[[c0:.*]] = llvm.mlir.constant(0 : index) : i64 162 // ROCDL: %[[descr4:.*]] = llvm.insertvalue %[[c0]], %[[descr3]][2] 163 // ROCDL: %[[c4:.*]] = llvm.mlir.constant(4 : index) : i64 164 // ROCDL: %[[descr5:.*]] = llvm.insertvalue %[[c4]], %[[descr4]][3, 0] 165 // ROCDL: %[[c12:.*]] = llvm.mlir.constant(12 : index) : i64 166 // ROCDL: %[[descr6:.*]] = llvm.insertvalue %[[c12]], %[[descr5]][4, 0] 167 // ROCDL: %[[c2:.*]] = llvm.mlir.constant(2 : index) : i64 168 // ROCDL: %[[descr7:.*]] = llvm.insertvalue %[[c2]], %[[descr6]][3, 1] 169 // ROCDL: %[[c6:.*]] = llvm.mlir.constant(6 : index) : i64 170 // ROCDL: %[[descr8:.*]] = llvm.insertvalue %[[c6]], %[[descr7]][4, 1] 171 // ROCDL: %[[c6:.*]] = llvm.mlir.constant(6 : index) : i64 172 // ROCDL: %[[descr9:.*]] = llvm.insertvalue %[[c6]], %[[descr8]][3, 2] 173 // ROCDL: %[[c1:.*]] = llvm.mlir.constant(1 : index) : i64 174 // ROCDL: %[[descr10:.*]] = llvm.insertvalue %[[c1]], %[[descr9]][4, 2] 175 176 %c0 = arith.constant 0 : index 177 memref.store %arg0, %arg1[%c0,%c0,%c0] : memref<4x2x6xf32, #gpu.address_space<workgroup>> 178 "terminator"() : () -> () 179 } 180} 181 182// ----- 183 184gpu.module @kernel { 185 // Check that several buffers are defined. 186 // NVVM: llvm.mlir.global internal @[[$buffer1:.*]]() 187 // NVVM-SAME: !llvm.array<1 x f32> 188 // NVVM: llvm.mlir.global internal @[[$buffer2:.*]]() 189 // NVVM-SAME: !llvm.array<2 x f32> 190 191 // ROCDL: llvm.mlir.global internal @[[$buffer1:.*]]() 192 // ROCDL-SAME: !llvm.array<1 x f32> 193 // ROCDL: llvm.mlir.global internal @[[$buffer2:.*]]() 194 // ROCDL-SAME: !llvm.array<2 x f32> 195 196 // NVVM-LABEL: llvm.func @multiple 197 // ROCDL-LABEL: llvm.func @multiple 198 gpu.func @multiple(%arg0: f32) 199 workgroup(%arg1: memref<1xf32, #gpu.address_space<workgroup>>, %arg2: memref<2xf32, #gpu.address_space<workgroup>>) 200 private(%arg3: memref<3xf32, #gpu.address_space<private>>, %arg4: memref<4xf32, #gpu.address_space<private>>) { 201 202 // Workgroup buffers. 203 // NVVM: llvm.mlir.addressof @[[$buffer1]] 204 // NVVM: llvm.mlir.addressof @[[$buffer2]] 205 206 // ROCDL: llvm.mlir.addressof @[[$buffer1]] 207 // ROCDL: llvm.mlir.addressof @[[$buffer2]] 208 209 // Private buffers. 210 // NVVM: %[[c3:.*]] = llvm.mlir.constant(3 : i64) 211 // NVVM: llvm.alloca %[[c3]] x f32 : (i64) -> !llvm.ptr 212 // NVVM: %[[c4:.*]] = llvm.mlir.constant(4 : i64) 213 // NVVM: llvm.alloca %[[c4]] x f32 : (i64) -> !llvm.ptr 214 215 // ROCDL: %[[c3:.*]] = llvm.mlir.constant(3 : i64) 216 // ROCDL: llvm.alloca %[[c3]] x f32 : (i64) -> !llvm.ptr<5> 217 // ROCDL: %[[c4:.*]] = llvm.mlir.constant(4 : i64) 218 // ROCDL: llvm.alloca %[[c4]] x f32 : (i64) -> !llvm.ptr<5> 219 220 %c0 = arith.constant 0 : index 221 memref.store %arg0, %arg1[%c0] : memref<1xf32, #gpu.address_space<workgroup>> 222 memref.store %arg0, %arg2[%c0] : memref<2xf32, #gpu.address_space<workgroup>> 223 memref.store %arg0, %arg3[%c0] : memref<3xf32, #gpu.address_space<private>> 224 memref.store %arg0, %arg4[%c0] : memref<4xf32, #gpu.address_space<private>> 225 "terminator"() : () -> () 226 } 227} 228 229// ----- 230 231gpu.module @kernel { 232 // Check that alignment attributes are set correctly 233 // NVVM: llvm.mlir.global internal @[[$buffer:.*]]() 234 // NVVM-SAME: addr_space = 3 235 // NVVM-SAME: alignment = 8 236 // NVVM-SAME: !llvm.array<48 x f32> 237 238 // ROCDL: llvm.mlir.global internal @[[$buffer:.*]]() 239 // ROCDL-SAME: addr_space = 3 240 // ROCDL-SAME: alignment = 8 241 // ROCDL-SAME: !llvm.array<48 x f32> 242 243 // NVVM-LABEL: llvm.func @explicitAlign 244 // ROCDL-LABEL: llvm.func @explicitAlign 245 gpu.func @explicitAlign(%arg0 : index) 246 workgroup(%arg1: memref<48xf32, #gpu.address_space<workgroup>> {llvm.align = 8 : i64}) 247 private(%arg2: memref<48xf32, #gpu.address_space<private>> {llvm.align = 4 : i64}) { 248 // NVVM: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64 249 // NVVM: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr 250 251 // ROCDL: %[[size:.*]] = llvm.mlir.constant(48 : i64) : i64 252 // ROCDL: %[[raw:.*]] = llvm.alloca %[[size]] x f32 {alignment = 4 : i64} : (i64) -> !llvm.ptr<5> 253 254 %val = memref.load %arg1[%arg0] : memref<48xf32, #gpu.address_space<workgroup>> 255 memref.store %val, %arg2[%arg0] : memref<48xf32, #gpu.address_space<private>> 256 "terminator"() : () -> () 257 } 258} 259