1// RUN: mlir-opt -split-input-file %s | FileCheck %s 2// Verify the printed output can be parsed. 3// RUN: mlir-opt -split-input-file %s | mlir-opt -split-input-file | FileCheck %s 4// Verify the generic form can be parsed. 5// RUN: mlir-opt -split-input-file -mlir-print-op-generic %s | mlir-opt -split-input-file | FileCheck %s 6 7func.func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x10xf32>) -> memref<10x10xf32> { 8 %c0 = arith.constant 0 : index 9 %c10 = arith.constant 10 : index 10 %c1 = arith.constant 1 : index 11 %async = arith.constant 1 : i64 12 13 acc.parallel async(%async: i64) { 14 acc.loop gang vector control(%arg3 : index, %arg4 : index, %arg5 : index) = (%c0, %c0, %c0 : index, index, index) to (%c10, %c10, %c10 : index, index, index) step (%c1, %c1, %c1 : index, index, index) { 15 %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32> 16 %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32> 17 %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32> 18 %p = arith.mulf %a, %b : f32 19 %co = arith.addf %cij, %p : f32 20 memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32> 21 acc.yield 22 } attributes { collapse = [3], collapseDeviceType = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true, true>} 23 acc.yield 24 } 25 26 return %C : memref<10x10xf32> 27} 28 29// CHECK-LABEL: func @compute1 30// CHECK-NEXT: %{{.*}} = arith.constant 0 : index 31// CHECK-NEXT: %{{.*}} = arith.constant 10 : index 32// CHECK-NEXT: %{{.*}} = arith.constant 1 : index 33// CHECK-NEXT: [[ASYNC:%.*]] = arith.constant 1 : i64 34// CHECK-NEXT: acc.parallel async([[ASYNC]] : i64) { 35// CHECK-NEXT: acc.loop gang vector control(%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { 36// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 37// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 38// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 39// CHECK-NEXT: %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32 40// CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 41// CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 42// CHECK-NEXT: acc.yield 43// CHECK-NEXT: } attributes {collapse = [3], collapseDeviceType = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true, true, true>} 44// CHECK-NEXT: acc.yield 45// CHECK-NEXT: } 46// CHECK-NEXT: return %{{.*}} : memref<10x10xf32> 47 48 49// ----- 50 51func.func @compute2(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x10xf32>) -> memref<10x10xf32> { 52 %c0 = arith.constant 0 : index 53 %c10 = arith.constant 10 : index 54 %c1 = arith.constant 1 : index 55 56 acc.parallel { 57 acc.loop control(%arg3 : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 58 scf.for %arg4 = %c0 to %c10 step %c1 { 59 scf.for %arg5 = %c0 to %c10 step %c1 { 60 %a = memref.load %A[%arg3, %arg5] : memref<10x10xf32> 61 %b = memref.load %B[%arg5, %arg4] : memref<10x10xf32> 62 %cij = memref.load %C[%arg3, %arg4] : memref<10x10xf32> 63 %p = arith.mulf %a, %b : f32 64 %co = arith.addf %cij, %p : f32 65 memref.store %co, %C[%arg3, %arg4] : memref<10x10xf32> 66 } 67 } 68 acc.yield 69 } attributes {seq = [#acc.device_type<none>], inclusiveUpperbound = array<i1: true>} 70 acc.yield 71 } 72 73 return %C : memref<10x10xf32> 74} 75 76// CHECK-LABEL: func @compute2( 77// CHECK-NEXT: %{{.*}} = arith.constant 0 : index 78// CHECK-NEXT: %{{.*}} = arith.constant 10 : index 79// CHECK-NEXT: %{{.*}} = arith.constant 1 : index 80// CHECK-NEXT: acc.parallel { 81// CHECK-NEXT: acc.loop control(%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) 82// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { 83// CHECK-NEXT: scf.for %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} { 84// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 85// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 86// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 87// CHECK-NEXT: %{{.*}} = arith.mulf %{{.*}}, %{{.*}} : f32 88// CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 89// CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 90// CHECK-NEXT: } 91// CHECK-NEXT: } 92// CHECK-NEXT: acc.yield 93// CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<none>]} 94// CHECK-NEXT: acc.yield 95// CHECK-NEXT: } 96// CHECK-NEXT: return %{{.*}} : memref<10x10xf32> 97// CHECK-NEXT: } 98 99// ----- 100 101acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { 102^bb0(%arg0: memref<10xf32>): 103 %0 = memref.alloc() : memref<10xf32> 104 acc.yield %0 : memref<10xf32> 105} destroy { 106^bb0(%arg0: memref<10xf32>): 107 memref.dealloc %arg0 : memref<10xf32> 108 acc.terminator 109} 110 111func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10xf32>, %d: memref<10xf32>) -> memref<10xf32> { 112 %lb = arith.constant 0 : index 113 %st = arith.constant 1 : index 114 %c10 = arith.constant 10 : index 115 %numGangs = arith.constant 10 : i64 116 %numWorkers = arith.constant 10 : i64 117 118 %pa = acc.present varPtr(%a : memref<10x10xf32>) varType(tensor<10x10xf32>) -> memref<10x10xf32> 119 %pb = acc.present varPtr(%b : memref<10x10xf32>) varType(tensor<10x10xf32>) -> memref<10x10xf32> 120 %pc = acc.present varPtr(%c : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 121 %pd = acc.present varPtr(%d : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 122 acc.data dataOperands(%pa, %pb, %pc, %pd: memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) { 123 %private = acc.private varPtr(%c : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 124 acc.parallel num_gangs({%numGangs: i64}) num_workers(%numWorkers: i64 [#acc.device_type<nvidia>]) private(@privatization_memref_10_f32 -> %private : memref<10xf32>) { 125 acc.loop gang control(%x : index) = (%lb : index) to (%c10 : index) step (%st : index) { 126 acc.loop worker control(%y : index) = (%lb : index) to (%c10 : index) step (%st : index) { 127 %axy = memref.load %a[%x, %y] : memref<10x10xf32> 128 %bxy = memref.load %b[%x, %y] : memref<10x10xf32> 129 %tmp = arith.addf %axy, %bxy : f32 130 memref.store %tmp, %c[%y] : memref<10xf32> 131 acc.yield 132 } attributes {inclusiveUpperbound = array<i1: true>} 133 134 acc.loop control(%i : index) = (%lb : index) to (%c10 : index) step (%st : index) { 135 // for i = 0 to 10 step 1 136 // d[x] += c[i] 137 %ci = memref.load %c[%i] : memref<10xf32> 138 %dx = memref.load %d[%x] : memref<10xf32> 139 %z = arith.addf %ci, %dx : f32 140 memref.store %z, %d[%x] : memref<10xf32> 141 acc.yield 142 } attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<nvidia>]} 143 acc.yield 144 } attributes {inclusiveUpperbound = array<i1: true>} 145 acc.yield 146 } 147 acc.terminator 148 } 149 150 return %d : memref<10xf32> 151} 152 153// CHECK: func @compute3({{.*}}: memref<10x10xf32>, {{.*}}: memref<10x10xf32>, [[ARG2:%.*]]: memref<10xf32>, {{.*}}: memref<10xf32>) -> memref<10xf32> { 154// CHECK-NEXT: [[C0:%.*]] = arith.constant 0 : index 155// CHECK-NEXT: [[C1:%.*]] = arith.constant 1 : index 156// CHECK-NEXT: [[C10:%.*]] = arith.constant 10 : index 157// CHECK-NEXT: [[NUMGANG:%.*]] = arith.constant 10 : i64 158// CHECK-NEXT: [[NUMWORKERS:%.*]] = arith.constant 10 : i64 159// CHECK: acc.data dataOperands(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>) { 160// CHECK-NEXT: %[[P_ARG2:.*]] = acc.private varPtr([[ARG2]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 161// CHECK-NEXT: acc.parallel num_gangs({[[NUMGANG]] : i64}) num_workers([[NUMWORKERS]] : i64 [#acc.device_type<nvidia>]) private(@privatization_memref_10_f32 -> %[[P_ARG2]] : memref<10xf32>) { 162// CHECK-NEXT: acc.loop gang control(%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { 163// CHECK-NEXT: acc.loop worker control(%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { 164// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 165// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}, %{{.*}}] : memref<10x10xf32> 166// CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 167// CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> 168// CHECK-NEXT: acc.yield 169// CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>} 170// CHECK-NEXT: acc.loop control(%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { 171// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32> 172// CHECK-NEXT: %{{.*}} = memref.load %{{.*}}[%{{.*}}] : memref<10xf32> 173// CHECK-NEXT: %{{.*}} = arith.addf %{{.*}}, %{{.*}} : f32 174// CHECK-NEXT: memref.store %{{.*}}, %{{.*}}[%{{.*}}] : memref<10xf32> 175// CHECK-NEXT: acc.yield 176// CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>, seq = [#acc.device_type<nvidia>]} 177// CHECK-NEXT: acc.yield 178// CHECK-NEXT: } attributes {inclusiveUpperbound = array<i1: true>} 179// CHECK-NEXT: acc.yield 180// CHECK-NEXT: } 181// CHECK-NEXT: acc.terminator 182// CHECK-NEXT: } 183// CHECK-NEXT: return %{{.*}} : memref<10xf32> 184// CHECK-NEXT: } 185 186// ----- 187 188func.func @testloopop(%a : memref<10xf32>) -> () { 189 %i64Value = arith.constant 1 : i64 190 %i32Value = arith.constant 128 : i32 191 %idxValue = arith.constant 8 : index 192 %c0 = arith.constant 0 : index 193 %c10 = arith.constant 10 : index 194 %c1 = arith.constant 1 : index 195 196 acc.loop gang vector worker control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 197 "test.openacc_dummy_op"() : () -> () 198 acc.yield 199 } attributes {inclusiveUpperbound = array<i1: true>} 200 acc.loop gang({num=%i64Value: i64}) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 201 "test.openacc_dummy_op"() : () -> () 202 acc.yield 203 } attributes {inclusiveUpperbound = array<i1: true>} 204 acc.loop gang({static=%i64Value: i64}) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 205 "test.openacc_dummy_op"() : () -> () 206 acc.yield 207 } attributes {inclusiveUpperbound = array<i1: true>} 208 acc.loop worker(%i64Value: i64) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 209 "test.openacc_dummy_op"() : () -> () 210 acc.yield 211 } attributes {inclusiveUpperbound = array<i1: true>} 212 acc.loop worker(%i32Value: i32) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 213 "test.openacc_dummy_op"() : () -> () 214 acc.yield 215 } attributes {inclusiveUpperbound = array<i1: true>} 216 acc.loop worker(%idxValue: index) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 217 "test.openacc_dummy_op"() : () -> () 218 acc.yield 219 } attributes {inclusiveUpperbound = array<i1: true>} 220 acc.loop vector(%i64Value: i64) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 221 "test.openacc_dummy_op"() : () -> () 222 acc.yield 223 } attributes {inclusiveUpperbound = array<i1: true>} 224 acc.loop vector(%i32Value: i32) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 225 "test.openacc_dummy_op"() : () -> () 226 acc.yield 227 } attributes {inclusiveUpperbound = array<i1: true>} 228 acc.loop vector(%idxValue: index) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 229 "test.openacc_dummy_op"() : () -> () 230 acc.yield 231 } attributes {inclusiveUpperbound = array<i1: true>} 232 acc.loop gang({num=%i64Value: i64}) worker vector control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 233 "test.openacc_dummy_op"() : () -> () 234 acc.yield 235 } attributes {inclusiveUpperbound = array<i1: true>} 236 acc.loop gang({num=%i64Value: i64, static=%i64Value: i64}) worker(%i64Value: i64) vector(%i64Value: i64) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 237 "test.openacc_dummy_op"() : () -> () 238 acc.yield 239 } attributes {inclusiveUpperbound = array<i1: true>} 240 acc.loop gang({num=%i32Value: i32, static=%idxValue: index}) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 241 "test.openacc_dummy_op"() : () -> () 242 acc.yield 243 } attributes {inclusiveUpperbound = array<i1: true>} 244 acc.loop tile({%i64Value : i64, %i64Value : i64}) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 245 "test.openacc_dummy_op"() : () -> () 246 acc.yield 247 } attributes {inclusiveUpperbound = array<i1: true>} 248 acc.loop tile({%i32Value : i32, %i32Value : i32}) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 249 "test.openacc_dummy_op"() : () -> () 250 acc.yield 251 } attributes {inclusiveUpperbound = array<i1: true>} 252 acc.loop gang({static=%i64Value: i64, num=%i64Value: i64}) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 253 "test.openacc_dummy_op"() : () -> () 254 acc.yield 255 } attributes {inclusiveUpperbound = array<i1: true>} 256 acc.loop gang({dim=%i64Value : i64, static=%i64Value: i64}) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 257 "test.openacc_dummy_op"() : () -> () 258 acc.yield 259 } attributes {inclusiveUpperbound = array<i1: true>} 260 %b = acc.cache varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 261 acc.loop cache(%b : memref<10xf32>) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 262 "test.openacc_dummy_op"() : () -> () 263 acc.yield 264 } attributes {inclusiveUpperbound = array<i1: true>} 265 return 266} 267 268// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 269// CHECK-NEXT: [[I32VALUE:%.*]] = arith.constant 128 : i32 270// CHECK-NEXT: [[IDXVALUE:%.*]] = arith.constant 8 : index 271// CHECK: acc.loop 272// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 273// CHECK-NEXT: acc.yield 274// CHECK-NEXT: attributes {inclusiveUpperbound = array<i1: true>} 275// CHECK: acc.loop gang({num=[[I64VALUE]] : i64}) 276// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 277// CHECK-NEXT: acc.yield 278// CHECK: acc.loop gang({static=[[I64VALUE]] : i64}) 279// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 280// CHECK-NEXT: acc.yield 281// CHECK: acc.loop worker([[I64VALUE]] : i64) 282// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 283// CHECK-NEXT: acc.yield 284// CHECK: acc.loop worker([[I32VALUE]] : i32) 285// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 286// CHECK-NEXT: acc.yield 287// CHECK: acc.loop worker([[IDXVALUE]] : index) 288// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 289// CHECK-NEXT: acc.yield 290// CHECK: acc.loop vector([[I64VALUE]] : i64) 291// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 292// CHECK-NEXT: acc.yield 293// CHECK: acc.loop vector([[I32VALUE]] : i32) 294// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 295// CHECK-NEXT: acc.yield 296// CHECK: acc.loop vector([[IDXVALUE]] : index) 297// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 298// CHECK-NEXT: acc.yield 299// CHECK: acc.loop gang({num=[[I64VALUE]] : i64}) worker vector 300// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 301// CHECK-NEXT: acc.yield 302// CHECK: acc.loop gang({num=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64}) worker([[I64VALUE]] : i64) vector([[I64VALUE]] : i64) 303// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 304// CHECK-NEXT: acc.yield 305// CHECK: acc.loop gang({num=[[I32VALUE]] : i32, static=[[IDXVALUE]] : index}) 306// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 307// CHECK-NEXT: acc.yield 308// CHECK: acc.loop tile({[[I64VALUE]] : i64, [[I64VALUE]] : i64}) 309// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 310// CHECK-NEXT: acc.yield 311// CHECK: acc.loop tile({[[I32VALUE]] : i32, [[I32VALUE]] : i32}) 312// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 313// CHECK-NEXT: acc.yield 314// CHECK: acc.loop gang({static=[[I64VALUE]] : i64, num=[[I64VALUE]] : i64}) 315// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 316// CHECK-NEXT: acc.yield 317// CHECK: acc.loop gang({dim=[[I64VALUE]] : i64, static=[[I64VALUE]] : i64}) 318// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 319// CHECK-NEXT: acc.yield 320// CHECK: %{{.*}} = acc.cache varPtr(%{{.*}} : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 321// CHECK-NEXT: acc.loop cache(%{{.*}} : memref<10xf32>) 322// CHECK-NEXT: "test.openacc_dummy_op"() : () -> () 323// CHECK-NEXT: acc.yield 324 325// ----- 326 327func.func @acc_loop_multiple_block() { 328 %c0 = arith.constant 0 : index 329 %c10 = arith.constant 10 : index 330 %c1 = arith.constant 1 : index 331 acc.parallel { 332 acc.loop control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 333 %c1_1 = arith.constant 1 : index 334 cf.br ^bb1(%c1_1 : index) 335 ^bb1(%9: index): 336 %c0_1 = arith.constant 0 : index 337 %12 = arith.cmpi sgt, %9, %c0_1 : index 338 cf.cond_br %12, ^bb2, ^bb3 339 ^bb2: 340 %c1_0 = arith.constant 1 : index 341 %c10_1 = arith.constant 10 : index 342 %22 = arith.subi %c10_1, %c1_0 : index 343 cf.br ^bb1(%22 : index) 344 ^bb3: 345 acc.yield 346 } attributes {inclusiveUpperbound = array<i1: true>} 347 acc.yield 348 } 349 return 350} 351 352// CHECK-LABEL: func.func @acc_loop_multiple_block() 353// CHECK: acc.parallel 354// CHECK: acc.loop 355// CHECK-3: ^bb{{.*}} 356// CHECK: acc.yield 357// CHECK: acc.yield 358 359// ----- 360 361acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { 362^bb0(%arg0: memref<10xf32>): 363 %0 = memref.alloc() : memref<10xf32> 364 acc.yield %0 : memref<10xf32> 365} destroy { 366^bb0(%arg0: memref<10xf32>): 367 memref.dealloc %arg0 : memref<10xf32> 368 acc.terminator 369} 370 371acc.private.recipe @privatization_memref_10_10_f32 : memref<10x10xf32> init { 372^bb0(%arg0: memref<10x10xf32>): 373 %0 = memref.alloc() : memref<10x10xf32> 374 acc.yield %0 : memref<10x10xf32> 375} destroy { 376^bb0(%arg0: memref<10x10xf32>): 377 memref.dealloc %arg0 : memref<10x10xf32> 378 acc.terminator 379} 380 381acc.firstprivate.recipe @privatization_memref_10xf32 : memref<10xf32> init { 382^bb0(%arg0: memref<10xf32>): 383 %0 = memref.alloc() : memref<10xf32> 384 acc.yield %0 : memref<10xf32> 385} copy { 386^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>): 387 acc.terminator 388} destroy { 389^bb0(%arg0: memref<10xf32>): 390 memref.dealloc %arg0 : memref<10xf32> 391 acc.terminator 392} 393 394func.func @testparallelop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () { 395 %i64value = arith.constant 1 : i64 396 %i32value = arith.constant 1 : i32 397 %idxValue = arith.constant 1 : index 398 acc.parallel async(%i64value: i64) { 399 } 400 acc.parallel async(%i32value: i32) { 401 } 402 acc.parallel async(%idxValue: index) { 403 } 404 acc.parallel wait({%i64value: i64}) { 405 } 406 acc.parallel wait({%i32value: i32}) { 407 } 408 acc.parallel wait({%idxValue: index}) { 409 } 410 acc.parallel wait({%i64value : i64, %i32value : i32, %idxValue : index}) { 411 } 412 acc.parallel num_gangs({%i64value: i64}) { 413 } 414 acc.parallel num_gangs({%i32value: i32}) { 415 } 416 acc.parallel num_gangs({%idxValue: index}) { 417 } 418 acc.parallel num_gangs({%i64value: i64, %i64value: i64, %idxValue: index}) { 419 } 420 acc.parallel num_workers(%i64value: i64 [#acc.device_type<nvidia>]) { 421 } 422 acc.parallel num_workers(%i32value: i32 [#acc.device_type<default>]) { 423 } 424 acc.parallel num_workers(%idxValue: index) { 425 } 426 acc.parallel vector_length(%i64value: i64) { 427 } 428 acc.parallel vector_length(%i32value: i32) { 429 } 430 acc.parallel vector_length(%idxValue: index) { 431 } 432 acc.parallel private(@privatization_memref_10_f32 -> %a : memref<10xf32>, @privatization_memref_10_10_f32 -> %c : memref<10x10xf32>) firstprivate(@privatization_memref_10xf32 -> %b: memref<10xf32>) { 433 } 434 acc.parallel { 435 } attributes {defaultAttr = #acc<defaultvalue none>} 436 acc.parallel { 437 } attributes {defaultAttr = #acc<defaultvalue present>} 438 acc.parallel { 439 } attributes {asyncAttr} 440 acc.parallel { 441 } attributes {waitAttr} 442 acc.parallel { 443 } attributes {selfAttr} 444 return 445} 446 447// CHECK: func @testparallelop([[ARGA:%.*]]: memref<10xf32>, [[ARGB:%.*]]: memref<10xf32>, [[ARGC:%.*]]: memref<10x10xf32>) { 448// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 449// CHECK: [[I32VALUE:%.*]] = arith.constant 1 : i32 450// CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index 451// CHECK: acc.parallel async([[I64VALUE]] : i64) { 452// CHECK-NEXT: } 453// CHECK: acc.parallel async([[I32VALUE]] : i32) { 454// CHECK-NEXT: } 455// CHECK: acc.parallel async([[IDXVALUE]] : index) { 456// CHECK-NEXT: } 457// CHECK: acc.parallel wait({[[I64VALUE]] : i64}) { 458// CHECK-NEXT: } 459// CHECK: acc.parallel wait({[[I32VALUE]] : i32}) { 460// CHECK-NEXT: } 461// CHECK: acc.parallel wait({[[IDXVALUE]] : index}) { 462// CHECK-NEXT: } 463// CHECK: acc.parallel wait({[[I64VALUE]] : i64, [[I32VALUE]] : i32, [[IDXVALUE]] : index}) { 464// CHECK-NEXT: } 465// CHECK: acc.parallel num_gangs({[[I64VALUE]] : i64}) { 466// CHECK-NEXT: } 467// CHECK: acc.parallel num_gangs({[[I32VALUE]] : i32}) { 468// CHECK-NEXT: } 469// CHECK: acc.parallel num_gangs({[[IDXVALUE]] : index}) { 470// CHECK-NEXT: } 471// CHECK: acc.parallel num_gangs({[[I64VALUE]] : i64, [[I64VALUE]] : i64, [[IDXVALUE]] : index}) { 472// CHECK-NEXT: } 473// CHECK: acc.parallel num_workers([[I64VALUE]] : i64 [#acc.device_type<nvidia>]) { 474// CHECK-NEXT: } 475// CHECK: acc.parallel num_workers([[I32VALUE]] : i32 [#acc.device_type<default>]) { 476// CHECK-NEXT: } 477// CHECK: acc.parallel num_workers([[IDXVALUE]] : index) { 478// CHECK-NEXT: } 479// CHECK: acc.parallel vector_length([[I64VALUE]] : i64) { 480// CHECK-NEXT: } 481// CHECK: acc.parallel vector_length([[I32VALUE]] : i32) { 482// CHECK-NEXT: } 483// CHECK: acc.parallel vector_length([[IDXVALUE]] : index) { 484// CHECK-NEXT: } 485// CHECK: acc.parallel firstprivate(@privatization_memref_10xf32 -> [[ARGB]] : memref<10xf32>) private(@privatization_memref_10_f32 -> [[ARGA]] : memref<10xf32>, @privatization_memref_10_10_f32 -> [[ARGC]] : memref<10x10xf32>) { 486// CHECK-NEXT: } 487// CHECK: acc.parallel { 488// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>} 489// CHECK: acc.parallel { 490// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue present>} 491// CHECK: acc.parallel { 492// CHECK-NEXT: } attributes {asyncAttr} 493// CHECK: acc.parallel { 494// CHECK-NEXT: } attributes {waitAttr} 495// CHECK: acc.parallel { 496// CHECK-NEXT: } attributes {selfAttr} 497 498// ----- 499 500acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init { 501^bb0(%arg0: memref<10xf32>): 502 %0 = memref.alloc() : memref<10xf32> 503 acc.yield %0 : memref<10xf32> 504} destroy { 505^bb0(%arg0: memref<10xf32>): 506 memref.dealloc %arg0 : memref<10xf32> 507 acc.terminator 508} 509 510acc.private.recipe @privatization_memref_10_10_f32 : memref<10x10xf32> init { 511^bb0(%arg0: memref<10x10xf32>): 512 %0 = memref.alloc() : memref<10x10xf32> 513 acc.yield %0 : memref<10x10xf32> 514} destroy { 515^bb0(%arg0: memref<10x10xf32>): 516 memref.dealloc %arg0 : memref<10x10xf32> 517 acc.terminator 518} 519 520// Test optional destroy region 521acc.firstprivate.recipe @firstprivatization_memref_20xf32 : memref<20xf32> init { 522^bb0(%arg0: memref<20xf32>): 523 %0 = memref.alloc() : memref<20xf32> 524 acc.yield %0 : memref<20xf32> 525} copy { 526^bb0(%arg0: memref<20xf32>, %arg1: memref<20xf32>): 527 acc.terminator 528} 529 530// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_memref_20xf32 : memref<20xf32> init 531 532acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init { 533^bb0(%arg0: memref<10xf32>): 534 %0 = memref.alloc() : memref<10xf32> 535 acc.yield %0 : memref<10xf32> 536} copy { 537^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>): 538 acc.terminator 539} destroy { 540^bb0(%arg0: memref<10xf32>): 541 memref.dealloc %arg0 : memref<10xf32> 542 acc.terminator 543} 544 545func.func @testserialop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () { 546 %i64value = arith.constant 1 : i64 547 %i32value = arith.constant 1 : i32 548 %idxValue = arith.constant 1 : index 549 acc.serial async(%i64value: i64) { 550 } 551 acc.serial async(%i32value: i32) { 552 } 553 acc.serial async(%idxValue: index) { 554 } 555 acc.serial wait({%i64value: i64}) { 556 } 557 acc.serial wait({%i32value: i32}) { 558 } 559 acc.serial wait({%idxValue: index}) { 560 } 561 acc.serial wait({%i64value : i64, %i32value : i32, %idxValue : index}) { 562 } 563 %firstprivate = acc.firstprivate varPtr(%b : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 564 acc.serial private(@privatization_memref_10_f32 -> %a : memref<10xf32>, @privatization_memref_10_10_f32 -> %c : memref<10x10xf32>) firstprivate(@firstprivatization_memref_10xf32 -> %firstprivate : memref<10xf32>) { 565 } 566 acc.serial { 567 } attributes {defaultAttr = #acc<defaultvalue none>} 568 acc.serial { 569 } attributes {defaultAttr = #acc<defaultvalue present>} 570 acc.serial { 571 } attributes {asyncAttr} 572 acc.serial { 573 } attributes {waitAttr} 574 acc.serial { 575 } attributes {selfAttr} 576 acc.serial { 577 acc.yield 578 } attributes {selfAttr} 579 return 580} 581 582// CHECK: func @testserialop([[ARGA:%.*]]: memref<10xf32>, [[ARGB:%.*]]: memref<10xf32>, [[ARGC:%.*]]: memref<10x10xf32>) { 583// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 584// CHECK: [[I32VALUE:%.*]] = arith.constant 1 : i32 585// CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index 586// CHECK: acc.serial async([[I64VALUE]] : i64) { 587// CHECK-NEXT: } 588// CHECK: acc.serial async([[I32VALUE]] : i32) { 589// CHECK-NEXT: } 590// CHECK: acc.serial async([[IDXVALUE]] : index) { 591// CHECK-NEXT: } 592// CHECK: acc.serial wait({[[I64VALUE]] : i64}) { 593// CHECK-NEXT: } 594// CHECK: acc.serial wait({[[I32VALUE]] : i32}) { 595// CHECK-NEXT: } 596// CHECK: acc.serial wait({[[IDXVALUE]] : index}) { 597// CHECK-NEXT: } 598// CHECK: acc.serial wait({[[I64VALUE]] : i64, [[I32VALUE]] : i32, [[IDXVALUE]] : index}) { 599// CHECK-NEXT: } 600// CHECK: %[[FIRSTP:.*]] = acc.firstprivate varPtr([[ARGB]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 601// CHECK: acc.serial firstprivate(@firstprivatization_memref_10xf32 -> %[[FIRSTP]] : memref<10xf32>) private(@privatization_memref_10_f32 -> [[ARGA]] : memref<10xf32>, @privatization_memref_10_10_f32 -> [[ARGC]] : memref<10x10xf32>) { 602// CHECK-NEXT: } 603// CHECK: acc.serial { 604// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>} 605// CHECK: acc.serial { 606// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue present>} 607// CHECK: acc.serial { 608// CHECK-NEXT: } attributes {asyncAttr} 609// CHECK: acc.serial { 610// CHECK-NEXT: } attributes {waitAttr} 611// CHECK: acc.serial { 612// CHECK-NEXT: } attributes {selfAttr} 613// CHECK: acc.serial { 614// CHECK: acc.yield 615// CHECK-NEXT: } attributes {selfAttr} 616 617// ----- 618 619 620func.func @testserialop(%a: memref<10xf32>, %b: memref<10xf32>, %c: memref<10x10xf32>) -> () { 621 %i64value = arith.constant 1 : i64 622 %i32value = arith.constant 1 : i32 623 %idxValue = arith.constant 1 : index 624 acc.kernels async(%i64value: i64) { 625 } 626 acc.kernels async(%i32value: i32) { 627 } 628 acc.kernels async(%idxValue: index) { 629 } 630 acc.kernels wait({%i64value: i64}) { 631 } 632 acc.kernels wait({%i32value: i32}) { 633 } 634 acc.kernels wait({%idxValue: index}) { 635 } 636 acc.kernels wait({%i64value : i64, %i32value : i32, %idxValue : index}) { 637 } 638 acc.kernels { 639 } attributes {defaultAttr = #acc<defaultvalue none>} 640 acc.kernels { 641 } attributes {defaultAttr = #acc<defaultvalue present>} 642 acc.kernels { 643 } attributes {asyncAttr} 644 acc.kernels { 645 } attributes {waitAttr} 646 acc.kernels { 647 } attributes {selfAttr} 648 acc.kernels { 649 acc.terminator 650 } attributes {selfAttr} 651 return 652} 653 654// CHECK: func @testserialop([[ARGA:%.*]]: memref<10xf32>, [[ARGB:%.*]]: memref<10xf32>, [[ARGC:%.*]]: memref<10x10xf32>) { 655// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 656// CHECK: [[I32VALUE:%.*]] = arith.constant 1 : i32 657// CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index 658// CHECK: acc.kernels async([[I64VALUE]] : i64) { 659// CHECK-NEXT: } 660// CHECK: acc.kernels async([[I32VALUE]] : i32) { 661// CHECK-NEXT: } 662// CHECK: acc.kernels async([[IDXVALUE]] : index) { 663// CHECK-NEXT: } 664// CHECK: acc.kernels wait({[[I64VALUE]] : i64}) { 665// CHECK-NEXT: } 666// CHECK: acc.kernels wait({[[I32VALUE]] : i32}) { 667// CHECK-NEXT: } 668// CHECK: acc.kernels wait({[[IDXVALUE]] : index}) { 669// CHECK-NEXT: } 670// CHECK: acc.kernels wait({[[I64VALUE]] : i64, [[I32VALUE]] : i32, [[IDXVALUE]] : index}) { 671// CHECK-NEXT: } 672// CHECK: acc.kernels { 673// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>} 674// CHECK: acc.kernels { 675// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue present>} 676// CHECK: acc.kernels { 677// CHECK-NEXT: } attributes {asyncAttr} 678// CHECK: acc.kernels { 679// CHECK-NEXT: } attributes {waitAttr} 680// CHECK: acc.kernels { 681// CHECK-NEXT: } attributes {selfAttr} 682// CHECK: acc.kernels { 683// CHECK: acc.terminator 684// CHECK-NEXT: } attributes {selfAttr} 685 686// ----- 687 688func.func @testdataop(%a: memref<f32>, %b: memref<f32>, %c: memref<f32>) -> () { 689 %ifCond = arith.constant true 690 691 %0 = acc.present varPtr(%a : memref<f32>) -> memref<f32> 692 acc.data if(%ifCond) dataOperands(%0 : memref<f32>) { 693 } 694 695 %1 = acc.present varPtr(%a : memref<f32>) -> memref<f32> 696 acc.data dataOperands(%1 : memref<f32>) if(%ifCond) { 697 } 698 699 %2 = acc.present varPtr(%a : memref<f32>) -> memref<f32> 700 %3 = acc.present varPtr(%b : memref<f32>) -> memref<f32> 701 %4 = acc.present varPtr(%c : memref<f32>) -> memref<f32> 702 acc.data dataOperands(%2, %3, %4 : memref<f32>, memref<f32>, memref<f32>) { 703 } 704 705 %5 = acc.copyin varPtr(%a : memref<f32>) -> memref<f32> 706 %6 = acc.copyin varPtr(%b : memref<f32>) -> memref<f32> 707 %7 = acc.copyin varPtr(%c : memref<f32>) -> memref<f32> 708 acc.data dataOperands(%5, %6, %7 : memref<f32>, memref<f32>, memref<f32>) { 709 } 710 711 %8 = acc.copyin varPtr(%a : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyin_readonly>} 712 %9 = acc.copyin varPtr(%b : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyin_readonly>} 713 %10 = acc.copyin varPtr(%c : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyin_readonly>} 714 acc.data dataOperands(%8, %9, %10 : memref<f32>, memref<f32>, memref<f32>) { 715 } 716 717 %11 = acc.create varPtr(%a : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 718 %12 = acc.create varPtr(%b : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 719 %13 = acc.create varPtr(%c : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 720 acc.data dataOperands(%11, %12, %13 : memref<f32>, memref<f32>, memref<f32>) { 721 } 722 acc.copyout accPtr(%11 : memref<f32>) to varPtr(%a : memref<f32>) 723 acc.copyout accPtr(%12 : memref<f32>) to varPtr(%b : memref<f32>) 724 acc.copyout accPtr(%13 : memref<f32>) to varPtr(%c : memref<f32>) 725 726 %14 = acc.create varPtr(%a : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout_zero>} 727 %15 = acc.create varPtr(%b : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout_zero>} 728 %16 = acc.create varPtr(%c : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout_zero>} 729 acc.data dataOperands(%14, %15, %16 : memref<f32>, memref<f32>, memref<f32>) { 730 } 731 acc.copyout accPtr(%14 : memref<f32>) to varPtr(%a : memref<f32>) {dataClause = #acc<data_clause acc_copyout_zero>} 732 acc.copyout accPtr(%15 : memref<f32>) to varPtr(%b : memref<f32>) {dataClause = #acc<data_clause acc_copyout_zero>} 733 acc.copyout accPtr(%16 : memref<f32>) to varPtr(%c : memref<f32>) {dataClause = #acc<data_clause acc_copyout_zero>} 734 735 %17 = acc.create varPtr(%a : memref<f32>) -> memref<f32> 736 %18 = acc.create varPtr(%b : memref<f32>) -> memref<f32> 737 %19 = acc.create varPtr(%c : memref<f32>) -> memref<f32> 738 acc.data dataOperands(%17, %18, %19 : memref<f32>, memref<f32>, memref<f32>) { 739 } 740 acc.delete accPtr(%17 : memref<f32>) {dataClause = #acc<data_clause acc_create>} 741 acc.delete accPtr(%18 : memref<f32>) {dataClause = #acc<data_clause acc_create>} 742 acc.delete accPtr(%19 : memref<f32>) {dataClause = #acc<data_clause acc_create>} 743 744 %20 = acc.create varPtr(%a : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_create_zero>} 745 %21 = acc.create varPtr(%b : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_create_zero>} 746 %22 = acc.create varPtr(%c : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_create_zero>} 747 acc.data dataOperands(%20, %21, %22 : memref<f32>, memref<f32>, memref<f32>) { 748 } 749 acc.delete accPtr(%20 : memref<f32>) {dataClause = #acc<data_clause acc_create_zero>} 750 acc.delete accPtr(%21 : memref<f32>) {dataClause = #acc<data_clause acc_create_zero>} 751 acc.delete accPtr(%22 : memref<f32>) {dataClause = #acc<data_clause acc_create_zero>} 752 753 %23 = acc.nocreate varPtr(%a : memref<f32>) -> memref<f32> 754 %24 = acc.nocreate varPtr(%b : memref<f32>) -> memref<f32> 755 %25 = acc.nocreate varPtr(%c : memref<f32>) -> memref<f32> 756 acc.data dataOperands(%23, %24, %25 : memref<f32>, memref<f32>, memref<f32>) { 757 } 758 759 %26 = acc.deviceptr varPtr(%a : memref<f32>) -> memref<f32> 760 %27 = acc.deviceptr varPtr(%b : memref<f32>) -> memref<f32> 761 %28 = acc.deviceptr varPtr(%c : memref<f32>) -> memref<f32> 762 acc.data dataOperands(%26, %27, %28 : memref<f32>, memref<f32>, memref<f32>) { 763 } 764 765 %29 = acc.attach varPtr(%a : memref<f32>) -> memref<f32> 766 %30 = acc.attach varPtr(%b : memref<f32>) -> memref<f32> 767 %31 = acc.attach varPtr(%c : memref<f32>) -> memref<f32> 768 acc.data dataOperands(%29, %30, %31 : memref<f32>, memref<f32>, memref<f32>) { 769 } 770 771 %32 = acc.copyin varPtr(%a : memref<f32>) -> memref<f32> 772 %33 = acc.create varPtr(%b : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 773 %34 = acc.present varPtr(%c : memref<f32>) -> memref<f32> 774 acc.data dataOperands(%32, %33, %34 : memref<f32>, memref<f32>, memref<f32>) { 775 } 776 acc.copyout accPtr(%33 : memref<f32>) to varPtr(%b : memref<f32>) 777 778 %35 = acc.present varPtr(%a : memref<f32>) -> memref<f32> 779 acc.data dataOperands(%35 : memref<f32>) { 780 } attributes { defaultAttr = #acc<defaultvalue none> } 781 782 783 %36 = acc.present varPtr(%a : memref<f32>) -> memref<f32> 784 acc.data dataOperands(%36 : memref<f32>) { 785 } attributes { defaultAttr = #acc<defaultvalue present> } 786 787 acc.data { 788 } attributes { defaultAttr = #acc<defaultvalue none> } 789 790 acc.data { 791 } attributes { defaultAttr = #acc<defaultvalue none>, async } 792 793 %a1 = arith.constant 1 : i64 794 acc.data async(%a1 : i64) { 795 } attributes { defaultAttr = #acc<defaultvalue none>, async } 796 797 acc.data { 798 } attributes { defaultAttr = #acc<defaultvalue none>, wait } 799 800 %w1 = arith.constant 1 : i64 801 acc.data wait({%w1 : i64}) { 802 } attributes { defaultAttr = #acc<defaultvalue none>, wait } 803 804 %wd1 = arith.constant 1 : i64 805 acc.data wait({devnum: %wd1 : i64, %w1 : i64}) { 806 } attributes { defaultAttr = #acc<defaultvalue none>, wait } 807 808 return 809} 810 811// CHECK: func @testdataop(%[[ARGA:.*]]: memref<f32>, %[[ARGB:.*]]: memref<f32>, %[[ARGC:.*]]: memref<f32>) { 812 813// CHECK: %[[IFCOND1:.*]] = arith.constant true 814// CHECK: %[[PRESENT_A:.*]] = acc.present varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 815// CHECK: acc.data if(%[[IFCOND1]]) dataOperands(%[[PRESENT_A]] : memref<f32>) { 816// CHECK-NEXT: } 817 818// CHECK: %[[PRESENT_A:.*]] = acc.present varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 819// CHECK: acc.data if(%[[IFCOND1]]) dataOperands(%[[PRESENT_A]] : memref<f32>) { 820// CHECK-NEXT: } 821 822// CHECK: %[[PRESENT_A:.*]] = acc.present varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 823// CHECK: %[[PRESENT_B:.*]] = acc.present varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> 824// CHECK: %[[PRESENT_C:.*]] = acc.present varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> 825// CHECK: acc.data dataOperands(%[[PRESENT_A]], %[[PRESENT_B]], %[[PRESENT_C]] : memref<f32>, memref<f32>, memref<f32>) { 826// CHECK-NEXT: } 827 828// CHECK: %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 829// CHECK: %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> 830// CHECK: %[[COPYIN_C:.*]] = acc.copyin varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> 831// CHECK: acc.data dataOperands(%[[COPYIN_A]], %[[COPYIN_B]], %[[COPYIN_C]] : memref<f32>, memref<f32>, memref<f32>) { 832// CHECK-NEXT: } 833 834// CHECK: %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyin_readonly>} 835// CHECK: %[[COPYIN_B:.*]] = acc.copyin varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyin_readonly>} 836// CHECK: %[[COPYIN_C:.*]] = acc.copyin varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyin_readonly>} 837// CHECK: acc.data dataOperands(%[[COPYIN_A]], %[[COPYIN_B]], %[[COPYIN_C]] : memref<f32>, memref<f32>, memref<f32>) { 838// CHECK-NEXT: } 839 840// CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 841// CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 842// CHECK: %[[CREATE_C:.*]] = acc.create varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 843// CHECK: acc.data dataOperands(%[[CREATE_A]], %[[CREATE_B]], %[[CREATE_C]] : memref<f32>, memref<f32>, memref<f32>) { 844// CHECK-NEXT: } 845// CHECK: acc.copyout accPtr(%[[CREATE_A]] : memref<f32>) to varPtr(%[[ARGA]] : memref<f32>) 846// CHECK: acc.copyout accPtr(%[[CREATE_B]] : memref<f32>) to varPtr(%[[ARGB]] : memref<f32>) 847// CHECK: acc.copyout accPtr(%[[CREATE_C]] : memref<f32>) to varPtr(%[[ARGC]] : memref<f32>) 848 849// CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout_zero>} 850// CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout_zero>} 851// CHECK: %[[CREATE_C:.*]] = acc.create varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout_zero>} 852// CHECK: acc.data dataOperands(%[[CREATE_A]], %[[CREATE_B]], %[[CREATE_C]] : memref<f32>, memref<f32>, memref<f32>) { 853// CHECK-NEXT: } 854// CHECK: acc.copyout accPtr(%[[CREATE_A]] : memref<f32>) to varPtr(%[[ARGA]] : memref<f32>) {dataClause = #acc<data_clause acc_copyout_zero>} 855// CHECK: acc.copyout accPtr(%[[CREATE_B]] : memref<f32>) to varPtr(%[[ARGB]] : memref<f32>) {dataClause = #acc<data_clause acc_copyout_zero>} 856// CHECK: acc.copyout accPtr(%[[CREATE_C]] : memref<f32>) to varPtr(%[[ARGC]] : memref<f32>) {dataClause = #acc<data_clause acc_copyout_zero>} 857 858// CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 859// CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> 860// CHECK: %[[CREATE_C:.*]] = acc.create varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> 861// CHECK: acc.data dataOperands(%[[CREATE_A]], %[[CREATE_B]], %[[CREATE_C]] : memref<f32>, memref<f32>, memref<f32>) { 862// CHECK-NEXT: } 863 864// CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_create_zero>} 865// CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_create_zero>} 866// CHECK: %[[CREATE_C:.*]] = acc.create varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_create_zero>} 867// CHECK: acc.data dataOperands(%[[CREATE_A]], %[[CREATE_B]], %[[CREATE_C]] : memref<f32>, memref<f32>, memref<f32>) { 868// CHECK-NEXT: } 869 870// CHECK: %[[NOCREATE_A:.*]] = acc.nocreate varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 871// CHECK: %[[NOCREATE_B:.*]] = acc.nocreate varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> 872// CHECK: %[[NOCREATE_C:.*]] = acc.nocreate varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> 873// CHECK: acc.data dataOperands(%[[NOCREATE_A]], %[[NOCREATE_B]], %[[NOCREATE_C]] : memref<f32>, memref<f32>, memref<f32>) { 874// CHECK-NEXT: } 875 876// CHECK: %[[DEVICEPTR_A:.*]] = acc.deviceptr varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 877// CHECK: %[[DEVICEPTR_B:.*]] = acc.deviceptr varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> 878// CHECK: %[[DEVICEPTR_C:.*]] = acc.deviceptr varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> 879// CHECK: acc.data dataOperands(%[[DEVICEPTR_A]], %[[DEVICEPTR_B]], %[[DEVICEPTR_C]] : memref<f32>, memref<f32>, memref<f32>) { 880// CHECK-NEXT: } 881 882// CHECK: %[[ATTACH_A:.*]] = acc.attach varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 883// CHECK: %[[ATTACH_B:.*]] = acc.attach varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> 884// CHECK: %[[ATTACH_C:.*]] = acc.attach varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> 885// CHECK: acc.data dataOperands(%[[ATTACH_A]], %[[ATTACH_B]], %[[ATTACH_C]] : memref<f32>, memref<f32>, memref<f32>) { 886// CHECK-NEXT: } 887 888 889// CHECK: %[[COPYIN_A:.*]] = acc.copyin varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 890// CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 891// CHECK: %[[PRESENT_C:.*]] = acc.present varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> 892// CHECK: acc.data dataOperands(%[[COPYIN_A]], %[[CREATE_B]], %[[PRESENT_C]] : memref<f32>, memref<f32>, memref<f32>) { 893// CHECK-NEXT: } 894// CHECK: acc.copyout accPtr(%[[CREATE_B]] : memref<f32>) to varPtr(%[[ARGB]] : memref<f32>) 895 896// CHECK: %[[PRESENT_A:.*]] = acc.present varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 897// CHECK: acc.data dataOperands(%[[PRESENT_A]] : memref<f32>) { 898// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>} 899 900// CHECK: %[[PRESENT_A:.*]] = acc.present varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 901// CHECK: acc.data dataOperands(%[[PRESENT_A]] : memref<f32>) { 902// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue present>} 903 904// CHECK: acc.data { 905// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>} 906 907// CHECK: acc.data { 908// CHECK-NEXT: } attributes {async, defaultAttr = #acc<defaultvalue none>} 909 910// CHECK: acc.data async(%{{.*}} : i64) { 911// CHECK-NEXT: } attributes {async, defaultAttr = #acc<defaultvalue none>} 912 913// CHECK: acc.data { 914// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>, wait} 915 916// CHECK: acc.data wait({%{{.*}} : i64}) { 917// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>, wait} 918 919// CHECK: acc.data wait({devnum: %{{.*}} : i64, %{{.*}} : i64}) { 920// CHECK-NEXT: } attributes {defaultAttr = #acc<defaultvalue none>, wait} 921 922// ----- 923 924func.func @testupdateop(%a: memref<f32>, %b: memref<f32>, %c: memref<f32>) -> () { 925 %i64Value = arith.constant 1 : i64 926 %i32Value = arith.constant 1 : i32 927 %idxValue = arith.constant 1 : index 928 %ifCond = arith.constant true 929 %0 = acc.update_device varPtr(%a : memref<f32>) -> memref<f32> 930 %1 = acc.update_device varPtr(%b : memref<f32>) -> memref<f32> 931 %2 = acc.update_device varPtr(%c : memref<f32>) -> memref<f32> 932 933 acc.update async(%i64Value: i64) dataOperands(%0: memref<f32>) 934 acc.update async(%i32Value: i32) dataOperands(%0: memref<f32>) 935 acc.update async(%i32Value: i32) dataOperands(%0: memref<f32>) 936 acc.update async(%idxValue: index) dataOperands(%0: memref<f32>) 937 acc.update wait({devnum: %i64Value: i64, %i32Value : i32, %idxValue : index}) dataOperands(%0: memref<f32>) 938 acc.update if(%ifCond) dataOperands(%0: memref<f32>) 939 acc.update dataOperands(%0: memref<f32>) 940 acc.update dataOperands(%0, %1, %2 : memref<f32>, memref<f32>, memref<f32>) 941 acc.update async dataOperands(%0, %1, %2 : memref<f32>, memref<f32>, memref<f32>) 942 acc.update wait dataOperands(%0, %1, %2 : memref<f32>, memref<f32>, memref<f32>) 943 acc.update dataOperands(%0, %1, %2 : memref<f32>, memref<f32>, memref<f32>) attributes {ifPresent} 944 return 945} 946 947// CHECK: func.func @testupdateop(%{{.*}}: memref<f32>, %{{.*}}: memref<f32>, %{{.*}}: memref<f32>) 948// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 949// CHECK: [[I32VALUE:%.*]] = arith.constant 1 : i32 950// CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index 951// CHECK: [[IFCOND:%.*]] = arith.constant true 952// CHECK: acc.update async([[I64VALUE]] : i64) dataOperands(%{{.*}} : memref<f32>) 953// CHECK: acc.update async([[I32VALUE]] : i32) dataOperands(%{{.*}} : memref<f32>) 954// CHECK: acc.update async([[I32VALUE]] : i32) dataOperands(%{{.*}} : memref<f32>) 955// CHECK: acc.update async([[IDXVALUE]] : index) dataOperands(%{{.*}} : memref<f32>) 956// CHECK: acc.update wait({devnum: [[I64VALUE]] : i64, [[I32VALUE]] : i32, [[IDXVALUE]] : index}) dataOperands(%{{.*}} : memref<f32>) 957// CHECK: acc.update if([[IFCOND]]) dataOperands(%{{.*}} : memref<f32>) 958// CHECK: acc.update dataOperands(%{{.*}} : memref<f32>) 959// CHECK: acc.update dataOperands(%{{.*}}, %{{.*}}, %{{.*}} : memref<f32>, memref<f32>, memref<f32>) 960// CHECK: acc.update async dataOperands(%{{.*}}, %{{.*}}, %{{.*}} : memref<f32>, memref<f32>, memref<f32>) 961// CHECK: acc.update wait dataOperands(%{{.*}}, %{{.*}}, %{{.*}} : memref<f32>, memref<f32>, memref<f32>) 962// CHECK: acc.update dataOperands(%{{.*}}, %{{.*}}, %{{.*}} : memref<f32>, memref<f32>, memref<f32>) attributes {ifPresent} 963 964// ----- 965 966%i64Value = arith.constant 1 : i64 967%i32Value = arith.constant 1 : i32 968%idxValue = arith.constant 1 : index 969%ifCond = arith.constant true 970acc.wait 971acc.wait(%i64Value: i64) 972acc.wait(%i32Value: i32) 973acc.wait(%idxValue: index) 974acc.wait(%i32Value, %idxValue : i32, index) 975acc.wait async(%i64Value: i64) 976acc.wait async(%i32Value: i32) 977acc.wait async(%idxValue: index) 978acc.wait(%i32Value: i32) async(%idxValue: index) 979acc.wait(%i64Value: i64) wait_devnum(%i32Value: i32) 980acc.wait attributes {async} 981acc.wait(%i64Value: i64) async(%idxValue: index) wait_devnum(%i32Value: i32) 982acc.wait(%i64Value: i64) wait_devnum(%i32Value: i32) async(%idxValue: index) 983acc.wait if(%ifCond) 984 985// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 986// CHECK: [[I32VALUE:%.*]] = arith.constant 1 : i32 987// CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index 988// CHECK: [[IFCOND:%.*]] = arith.constant true 989// CHECK: acc.wait 990// CHECK: acc.wait([[I64VALUE]] : i64) 991// CHECK: acc.wait([[I32VALUE]] : i32) 992// CHECK: acc.wait([[IDXVALUE]] : index) 993// CHECK: acc.wait([[I32VALUE]], [[IDXVALUE]] : i32, index) 994// CHECK: acc.wait async([[I64VALUE]] : i64) 995// CHECK: acc.wait async([[I32VALUE]] : i32) 996// CHECK: acc.wait async([[IDXVALUE]] : index) 997// CHECK: acc.wait([[I32VALUE]] : i32) async([[IDXVALUE]] : index) 998// CHECK: acc.wait([[I64VALUE]] : i64) wait_devnum([[I32VALUE]] : i32) 999// CHECK: acc.wait attributes {async} 1000// CHECK: acc.wait([[I64VALUE]] : i64) async([[IDXVALUE]] : index) wait_devnum([[I32VALUE]] : i32) 1001// CHECK: acc.wait([[I64VALUE]] : i64) async([[IDXVALUE]] : index) wait_devnum([[I32VALUE]] : i32) 1002// CHECK: acc.wait if([[IFCOND]]) 1003 1004// ----- 1005 1006%i64Value = arith.constant 1 : i64 1007%i32Value = arith.constant 1 : i32 1008%i32Value2 = arith.constant 2 : i32 1009%idxValue = arith.constant 1 : index 1010%ifCond = arith.constant true 1011acc.init 1012acc.init attributes {acc.device_types = [#acc.device_type<nvidia>]} 1013acc.init device_num(%i64Value : i64) 1014acc.init device_num(%i32Value : i32) 1015acc.init device_num(%idxValue : index) 1016acc.init if(%ifCond) 1017acc.init if(%ifCond) device_num(%idxValue : index) 1018acc.init device_num(%idxValue : index) if(%ifCond) 1019 1020// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 1021// CHECK: [[I32VALUE:%.*]] = arith.constant 1 : i32 1022// CHECK: [[I32VALUE2:%.*]] = arith.constant 2 : i32 1023// CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index 1024// CHECK: [[IFCOND:%.*]] = arith.constant true 1025// CHECK: acc.init 1026// CHECK: acc.init attributes {acc.device_types = [#acc.device_type<nvidia>]} 1027// CHECK: acc.init device_num([[I64VALUE]] : i64) 1028// CHECK: acc.init device_num([[I32VALUE]] : i32) 1029// CHECK: acc.init device_num([[IDXVALUE]] : index) 1030// CHECK: acc.init if([[IFCOND]]) 1031// CHECK: acc.init device_num([[IDXVALUE]] : index) if([[IFCOND]]) 1032// CHECK: acc.init device_num([[IDXVALUE]] : index) if([[IFCOND]]) 1033 1034// ----- 1035 1036%i64Value = arith.constant 1 : i64 1037%i32Value = arith.constant 1 : i32 1038%i32Value2 = arith.constant 2 : i32 1039%idxValue = arith.constant 1 : index 1040%ifCond = arith.constant true 1041acc.shutdown 1042acc.shutdown attributes {acc.device_types = [#acc.device_type<default>]} 1043acc.shutdown device_num(%i64Value : i64) 1044acc.shutdown device_num(%i32Value : i32) 1045acc.shutdown device_num(%idxValue : index) 1046acc.shutdown if(%ifCond) 1047acc.shutdown if(%ifCond) device_num(%idxValue : index) 1048acc.shutdown device_num(%idxValue : index) if(%ifCond) 1049 1050// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 1051// CHECK: [[I32VALUE:%.*]] = arith.constant 1 : i32 1052// CHECK: [[I32VALUE2:%.*]] = arith.constant 2 : i32 1053// CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index 1054// CHECK: [[IFCOND:%.*]] = arith.constant true 1055// CHECK: acc.shutdown 1056// CHECK: acc.shutdown attributes {acc.device_types = [#acc.device_type<default>]} 1057// CHECK: acc.shutdown device_num([[I64VALUE]] : i64) 1058// CHECK: acc.shutdown device_num([[I32VALUE]] : i32) 1059// CHECK: acc.shutdown device_num([[IDXVALUE]] : index) 1060// CHECK: acc.shutdown if([[IFCOND]]) 1061// CHECK: acc.shutdown device_num([[IDXVALUE]] : index) if([[IFCOND]]) 1062// CHECK: acc.shutdown device_num([[IDXVALUE]] : index) if([[IFCOND]]) 1063 1064// ----- 1065 1066func.func @testexitdataop(%a: !llvm.ptr) -> () { 1067 %ifCond = arith.constant true 1068 %i64Value = arith.constant 1 : i64 1069 %i32Value = arith.constant 1 : i32 1070 %idxValue = arith.constant 1 : index 1071 1072 %0 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1073 acc.exit_data dataOperands(%0 : !llvm.ptr) 1074 acc.copyout accPtr(%0 : !llvm.ptr) to varPtr(%a : !llvm.ptr) varType(f64) 1075 1076 %1 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1077 acc.exit_data dataOperands(%1 : !llvm.ptr) 1078 acc.delete accPtr(%1 : !llvm.ptr) 1079 1080 %2 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1081 acc.exit_data dataOperands(%2 : !llvm.ptr) attributes {async,finalize} 1082 acc.delete accPtr(%2 : !llvm.ptr) 1083 1084 %3 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1085 acc.exit_data dataOperands(%3 : !llvm.ptr) 1086 acc.detach accPtr(%3 : !llvm.ptr) 1087 1088 %4 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1089 acc.exit_data dataOperands(%4 : !llvm.ptr) attributes {async} 1090 acc.copyout accPtr(%4 : !llvm.ptr) to varPtr(%a : !llvm.ptr) varType(f64) 1091 1092 %5 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1093 acc.exit_data dataOperands(%5 : !llvm.ptr) attributes {wait} 1094 acc.delete accPtr(%5 : !llvm.ptr) 1095 1096 %6 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1097 acc.exit_data async(%i64Value : i64) dataOperands(%6 : !llvm.ptr) 1098 acc.copyout accPtr(%6 : !llvm.ptr) to varPtr(%a : !llvm.ptr) varType(f64) 1099 1100 %7 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1101 acc.exit_data dataOperands(%7 : !llvm.ptr) async(%i64Value : i64) 1102 acc.copyout accPtr(%7 : !llvm.ptr) to varPtr(%a : !llvm.ptr) varType(f64) 1103 1104 %8 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1105 acc.exit_data if(%ifCond) dataOperands(%8 : !llvm.ptr) 1106 acc.copyout accPtr(%8 : !llvm.ptr) to varPtr(%a : !llvm.ptr) varType(f64) 1107 1108 %9 = acc.getdeviceptr varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1109 acc.exit_data wait_devnum(%i64Value: i64) wait(%i32Value, %idxValue : i32, index) dataOperands(%9 : !llvm.ptr) 1110 acc.copyout accPtr(%9 : !llvm.ptr) to varPtr(%a : !llvm.ptr) varType(f64) 1111 1112 return 1113} 1114 1115// CHECK: func @testexitdataop(%[[ARGA:.*]]: !llvm.ptr) { 1116// CHECK: %[[IFCOND:.*]] = arith.constant true 1117// CHECK: %[[I64VALUE:.*]] = arith.constant 1 : i64 1118// CHECK: %[[I32VALUE:.*]] = arith.constant 1 : i32 1119// CHECK: %[[IDXVALUE:.*]] = arith.constant 1 : index 1120 1121// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1122// CHECK: acc.exit_data dataOperands(%[[DEVPTR]] : !llvm.ptr) 1123// CHECK: acc.copyout accPtr(%[[DEVPTR]] : !llvm.ptr) to varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) 1124 1125// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1126// CHECK: acc.exit_data dataOperands(%[[DEVPTR]] : !llvm.ptr) 1127// CHECK: acc.delete accPtr(%[[DEVPTR]] : !llvm.ptr) 1128 1129// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1130// CHECK: acc.exit_data dataOperands(%[[DEVPTR]] : !llvm.ptr) attributes {async, finalize} 1131// CHECK: acc.delete accPtr(%[[DEVPTR]] : !llvm.ptr) 1132 1133// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1134// CHECK: acc.exit_data dataOperands(%[[DEVPTR]] : !llvm.ptr) 1135// CHECK: acc.detach accPtr(%[[DEVPTR]] : !llvm.ptr) 1136 1137// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1138// CHECK: acc.exit_data dataOperands(%[[DEVPTR]] : !llvm.ptr) attributes {async} 1139// CHECK: acc.copyout accPtr(%[[DEVPTR]] : !llvm.ptr) to varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) 1140 1141// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1142// CHECK: acc.exit_data dataOperands(%[[DEVPTR]] : !llvm.ptr) attributes {wait} 1143// CHECK: acc.delete accPtr(%[[DEVPTR]] : !llvm.ptr) 1144 1145// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1146// CHECK: acc.exit_data async(%[[I64VALUE]] : i64) dataOperands(%[[DEVPTR]] : !llvm.ptr) 1147// CHECK: acc.copyout accPtr(%[[DEVPTR]] : !llvm.ptr) to varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) 1148 1149// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1150// CHECK: acc.exit_data async(%[[I64VALUE]] : i64) dataOperands(%[[DEVPTR]] : !llvm.ptr) 1151// CHECK: acc.copyout accPtr(%[[DEVPTR]] : !llvm.ptr) to varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) 1152 1153// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1154// CHECK: acc.exit_data if(%[[IFCOND]]) dataOperands(%[[DEVPTR]] : !llvm.ptr) 1155// CHECK: acc.copyout accPtr(%[[DEVPTR]] : !llvm.ptr) to varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) 1156 1157// CHECK: %[[DEVPTR:.*]] = acc.getdeviceptr varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1158// CHECK: acc.exit_data wait_devnum(%[[I64VALUE]] : i64) wait(%[[I32VALUE]], %[[IDXVALUE]] : i32, index) dataOperands(%[[DEVPTR]] : !llvm.ptr) 1159// CHECK: acc.copyout accPtr(%[[DEVPTR]] : !llvm.ptr) to varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) 1160 1161// ----- 1162 1163 1164func.func @testenterdataop(%a: !llvm.ptr, %b: !llvm.ptr, %c: !llvm.ptr) -> () { 1165 %ifCond = arith.constant true 1166 %i64Value = arith.constant 1 : i64 1167 %i32Value = arith.constant 1 : i32 1168 %idxValue = arith.constant 1 : index 1169 1170 %0 = acc.copyin varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1171 acc.enter_data dataOperands(%0 : !llvm.ptr) 1172 %1 = acc.create varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1173 %2 = acc.create varPtr(%b : !llvm.ptr) varType(f64) -> !llvm.ptr {dataClause = #acc<data_clause acc_create_zero>} 1174 %3 = acc.create varPtr(%c : !llvm.ptr) varType(f64) -> !llvm.ptr {dataClause = #acc<data_clause acc_create_zero>} 1175 acc.enter_data dataOperands(%1, %2, %3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) 1176 %4 = acc.attach varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1177 acc.enter_data dataOperands(%4 : !llvm.ptr) 1178 %5 = acc.copyin varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1179 acc.enter_data dataOperands(%5 : !llvm.ptr) attributes {async} 1180 %6 = acc.create varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1181 acc.enter_data dataOperands(%6 : !llvm.ptr) attributes {wait} 1182 %7 = acc.copyin varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1183 acc.enter_data async(%i64Value : i64) dataOperands(%7 : !llvm.ptr) 1184 %8 = acc.copyin varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1185 acc.enter_data dataOperands(%8 : !llvm.ptr) async(%i64Value : i64) 1186 %9 = acc.copyin varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1187 acc.enter_data if(%ifCond) dataOperands(%9 : !llvm.ptr) 1188 %10 = acc.copyin varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1189 acc.enter_data wait_devnum(%i64Value: i64) wait(%i32Value, %idxValue : i32, index) dataOperands(%10 : !llvm.ptr) 1190 1191 return 1192} 1193 1194// CHECK: func @testenterdataop(%[[ARGA:.*]]: !llvm.ptr, %[[ARGB:.*]]: !llvm.ptr, %[[ARGC:.*]]: !llvm.ptr) { 1195// CHECK: [[IFCOND:%.*]] = arith.constant true 1196// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 1197// CHECK: [[I32VALUE:%.*]] = arith.constant 1 : i32 1198// CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index 1199// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1200// CHECK: acc.enter_data dataOperands(%[[COPYIN]] : !llvm.ptr) 1201// CHECK: %[[CREATE_A:.*]] = acc.create varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1202// CHECK: %[[CREATE_B:.*]] = acc.create varPtr(%[[ARGB]] : !llvm.ptr) varType(f64) -> !llvm.ptr {dataClause = #acc<data_clause acc_create_zero>} 1203// CHECK: %[[CREATE_C:.*]] = acc.create varPtr(%[[ARGC]] : !llvm.ptr) varType(f64) -> !llvm.ptr {dataClause = #acc<data_clause acc_create_zero>} 1204// CHECK: acc.enter_data dataOperands(%[[CREATE_A]], %[[CREATE_B]], %[[CREATE_C]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) 1205// CHECK: %[[ATTACH:.*]] = acc.attach varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1206// CHECK: acc.enter_data dataOperands(%[[ATTACH]] : !llvm.ptr) 1207// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1208// CHECK: acc.enter_data dataOperands(%[[COPYIN]] : !llvm.ptr) attributes {async} 1209// CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1210// CHECK: acc.enter_data dataOperands(%[[CREATE]] : !llvm.ptr) attributes {wait} 1211// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1212// CHECK: acc.enter_data async([[I64VALUE]] : i64) dataOperands(%[[COPYIN]] : !llvm.ptr) 1213// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1214// CHECK: acc.enter_data async([[I64VALUE]] : i64) dataOperands(%[[COPYIN]] : !llvm.ptr) 1215// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1216// CHECK: acc.enter_data if([[IFCOND]]) dataOperands(%[[COPYIN]] : !llvm.ptr) 1217// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr(%[[ARGA]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1218// CHECK: acc.enter_data wait_devnum([[I64VALUE]] : i64) wait([[I32VALUE]], [[IDXVALUE]] : i32, index) dataOperands(%[[COPYIN]] : !llvm.ptr) 1219 1220// ----- 1221 1222func.func @teststructureddataclauseops(%a: memref<10xf32>, %b: memref<memref<10xf32>>, %c: memref<10x20xf32>) -> () { 1223 %deviceptr = acc.deviceptr varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {name = "arrayA"} 1224 acc.parallel dataOperands(%deviceptr : memref<10xf32>) { 1225 } 1226 1227 %present = acc.present varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 1228 acc.data dataOperands(%present : memref<10xf32>) { 1229 } 1230 1231 %copyin = acc.copyin varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 1232 acc.parallel dataOperands(%copyin : memref<10xf32>) { 1233 } 1234 1235 %copyinreadonly = acc.copyin varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copyin_readonly>} 1236 acc.kernels dataOperands(%copyinreadonly : memref<10xf32>) { 1237 } 1238 1239 %copyinfromcopy = acc.copyin varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copy>} 1240 acc.serial dataOperands(%copyinfromcopy : memref<10xf32>) { 1241 } 1242 acc.copyout accPtr(%copyinfromcopy : memref<10xf32>) to varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) {dataClause = #acc<data_clause acc_copy>} 1243 1244 %create = acc.create varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 1245 %createimplicit = acc.create varPtr(%c : memref<10x20xf32>) varType(tensor<10x20xf32>) -> memref<10x20xf32> {implicit = true} 1246 acc.parallel dataOperands(%create, %createimplicit : memref<10xf32>, memref<10x20xf32>) { 1247 } 1248 acc.delete accPtr(%create : memref<10xf32>) {dataClause = #acc<data_clause acc_create>} 1249 acc.delete accPtr(%createimplicit : memref<10x20xf32>) {dataClause = #acc<data_clause acc_create>, implicit = true} 1250 1251 %copyoutzero = acc.create varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copyout_zero>} 1252 acc.parallel dataOperands(%copyoutzero: memref<10xf32>) { 1253 } 1254 acc.copyout accPtr(%copyoutzero : memref<10xf32>) to varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) {dataClause = #acc<data_clause acc_copyout_zero>} 1255 1256 %attach = acc.attach varPtr(%b : memref<memref<10xf32>>) -> memref<memref<10xf32>> 1257 acc.parallel dataOperands(%attach : memref<memref<10xf32>>) { 1258 } 1259 acc.detach accPtr(%attach : memref<memref<10xf32>>) {dataClause = #acc<data_clause acc_attach>} 1260 1261 %copyinparent = acc.copyin varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) varPtrPtr(%b : memref<memref<10xf32>>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copy>} 1262 acc.parallel dataOperands(%copyinparent : memref<10xf32>) { 1263 } 1264 acc.copyout accPtr(%copyinparent : memref<10xf32>) to varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) {dataClause = #acc<data_clause acc_copy>} 1265 1266 %c0 = arith.constant 0 : index 1267 %c1 = arith.constant 1 : index 1268 %c4 = arith.constant 4 : index 1269 %c9 = arith.constant 9 : index 1270 %c10 = arith.constant 10 : index 1271 %c20 = arith.constant 20 : index 1272 1273 %bounds1full = acc.bounds lowerbound(%c0 : index) upperbound(%c9 : index) stride(%c1 : index) 1274 %copyinfullslice1 = acc.copyin varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) bounds(%bounds1full) -> memref<10xf32> {name = "arrayA[0:9]"} 1275 // Specify full-bounds but assume that startIdx of array reference is 1. 1276 %bounds2full = acc.bounds lowerbound(%c1 : index) upperbound(%c20 : index) extent(%c20 : index) stride(%c4 : index) startIdx(%c1 : index) {strideInBytes = true} 1277 %copyinfullslice2 = acc.copyin varPtr(%c : memref<10x20xf32>) varType(tensor<10x20xf32>) bounds(%bounds1full, %bounds2full) -> memref<10x20xf32> 1278 acc.parallel dataOperands(%copyinfullslice1, %copyinfullslice2 : memref<10xf32>, memref<10x20xf32>) { 1279 } 1280 1281 %bounds1partial = acc.bounds lowerbound(%c4 : index) upperbound(%c9 : index) stride(%c1 : index) 1282 %copyinpartial = acc.copyin varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) bounds(%bounds1partial) -> memref<10xf32> {dataClause = #acc<data_clause acc_copy>} 1283 acc.parallel dataOperands(%copyinpartial : memref<10xf32>) { 1284 } 1285 acc.copyout accPtr(%copyinpartial : memref<10xf32>) bounds(%bounds1partial) to varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) {dataClause = #acc<data_clause acc_copy>} 1286 1287 return 1288} 1289 1290// CHECK: func.func @teststructureddataclauseops([[ARGA:%.*]]: memref<10xf32>, [[ARGB:%.*]]: memref<memref<10xf32>>, [[ARGC:%.*]]: memref<10x20xf32>) { 1291// CHECK: [[DEVICEPTR:%.*]] = acc.deviceptr varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {name = "arrayA"} 1292// CHECK-NEXT: acc.parallel dataOperands([[DEVICEPTR]] : memref<10xf32>) { 1293// CHECK-NEXT: } 1294// CHECK: [[PRESENT:%.*]] = acc.present varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 1295// CHECK-NEXT: acc.data dataOperands([[PRESENT]] : memref<10xf32>) { 1296// CHECK-NEXT: } 1297// CHECK: [[COPYIN:%.*]] = acc.copyin varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 1298// CHECK-NEXT: acc.parallel dataOperands([[COPYIN]] : memref<10xf32>) { 1299// CHECK-NEXT: } 1300// CHECK: [[COPYINRO:%.*]] = acc.copyin varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copyin_readonly>} 1301// CHECK-NEXT: acc.kernels dataOperands([[COPYINRO]] : memref<10xf32>) { 1302// CHECK-NEXT: } 1303// CHECK: [[COPYINCOPY:%.*]] = acc.copyin varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copy>} 1304// CHECK-NEXT: acc.serial dataOperands([[COPYINCOPY]] : memref<10xf32>) { 1305// CHECK-NEXT: } 1306// CHECK-NEXT: acc.copyout accPtr([[COPYINCOPY]] : memref<10xf32>) to varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) {dataClause = #acc<data_clause acc_copy>} 1307// CHECK: [[CREATE:%.*]] = acc.create varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 1308// CHECK-NEXT: [[CREATEIMP:%.*]] = acc.create varPtr([[ARGC]] : memref<10x20xf32>) varType(tensor<10x20xf32>) -> memref<10x20xf32> {implicit = true} 1309// CHECK-NEXT: acc.parallel dataOperands([[CREATE]], [[CREATEIMP]] : memref<10xf32>, memref<10x20xf32>) { 1310// CHECK-NEXT: } 1311// CHECK-NEXT: acc.delete accPtr([[CREATE]] : memref<10xf32>) {dataClause = #acc<data_clause acc_create>} 1312// CHECK-NEXT: acc.delete accPtr([[CREATEIMP]] : memref<10x20xf32>) {dataClause = #acc<data_clause acc_create>, implicit = true} 1313// CHECK: [[COPYOUTZ:%.*]] = acc.create varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copyout_zero>} 1314// CHECK-NEXT: acc.parallel dataOperands([[COPYOUTZ]] : memref<10xf32>) { 1315// CHECK-NEXT: } 1316// CHECK-NEXT: acc.copyout accPtr([[COPYOUTZ]] : memref<10xf32>) to varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) {dataClause = #acc<data_clause acc_copyout_zero>} 1317// CHECK: [[ATTACH:%.*]] = acc.attach varPtr([[ARGB]] : memref<memref<10xf32>>) -> memref<memref<10xf32>> 1318// CHECK-NEXT: acc.parallel dataOperands([[ATTACH]] : memref<memref<10xf32>>) { 1319// CHECK-NEXT: } 1320// CHECK-NEXT: acc.detach accPtr([[ATTACH]] : memref<memref<10xf32>>) {dataClause = #acc<data_clause acc_attach>} 1321// CHECK: [[COPYINP:%.*]] = acc.copyin varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) varPtrPtr([[ARGB]] : memref<memref<10xf32>>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copy>} 1322// CHECK-NEXT: acc.parallel dataOperands([[COPYINP]] : memref<10xf32>) { 1323// CHECK-NEXT: } 1324// CHECK-NEXT: acc.copyout accPtr([[COPYINP]] : memref<10xf32>) to varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) {dataClause = #acc<data_clause acc_copy>} 1325// CHECK-DAG: [[CON0:%.*]] = arith.constant 0 : index 1326// CHECK-DAG: [[CON1:%.*]] = arith.constant 1 : index 1327// CHECK-DAG: [[CON4:%.*]] = arith.constant 4 : index 1328// CHECK-DAG: [[CON9:%.*]] = arith.constant 9 : index 1329// CHECK-DAG: [[CON20:%.*]] = arith.constant 20 : index 1330// CHECK: [[BOUNDS1F:%.*]] = acc.bounds lowerbound([[CON0]] : index) upperbound([[CON9]] : index) stride([[CON1]] : index) 1331// CHECK-NEXT: [[COPYINF1:%.*]] = acc.copyin varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) bounds([[BOUNDS1F]]) -> memref<10xf32> {name = "arrayA[0:9]"} 1332// CHECK-NEXT: [[BOUNDS2F:%.*]] = acc.bounds lowerbound([[CON1]] : index) upperbound([[CON20]] : index) extent([[CON20]] : index) stride([[CON4]] : index) startIdx([[CON1]] : index) {strideInBytes = true} 1333// CHECK-NEXT: [[COPYINF2:%.*]] = acc.copyin varPtr([[ARGC]] : memref<10x20xf32>) varType(tensor<10x20xf32>) bounds([[BOUNDS1F]], [[BOUNDS2F]]) -> memref<10x20xf32> 1334// CHECK-NEXT: acc.parallel dataOperands([[COPYINF1]], [[COPYINF2]] : memref<10xf32>, memref<10x20xf32>) { 1335// CHECK-NEXT: } 1336// CHECK: [[BOUNDS1P:%.*]] = acc.bounds lowerbound([[CON4]] : index) upperbound([[CON9]] : index) stride([[CON1]] : index) 1337// CHECK-NEXT: [[COPYINPART:%.*]] = acc.copyin varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) bounds([[BOUNDS1P]]) -> memref<10xf32> {dataClause = #acc<data_clause acc_copy>} 1338// CHECK-NEXT: acc.parallel dataOperands([[COPYINPART]] : memref<10xf32>) { 1339// CHECK-NEXT: } 1340// CHECK-NEXT: acc.copyout accPtr([[COPYINPART]] : memref<10xf32>) bounds([[BOUNDS1P]]) to varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) {dataClause = #acc<data_clause acc_copy>} 1341 1342// ----- 1343 1344func.func @testunstructuredclauseops(%a: memref<10xf32>) -> () { 1345 %copyin = acc.copyin varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {structured = false} 1346 acc.enter_data dataOperands(%copyin : memref<10xf32>) 1347 1348 %devptr = acc.getdeviceptr varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copyout>} 1349 acc.exit_data dataOperands(%devptr : memref<10xf32>) 1350 acc.copyout accPtr(%devptr : memref<10xf32>) to varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) {structured = false} 1351 1352 return 1353} 1354 1355// CHECK: func.func @testunstructuredclauseops([[ARGA:%.*]]: memref<10xf32>) { 1356// CHECK: [[COPYIN:%.*]] = acc.copyin varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {structured = false} 1357// CHECK-NEXT: acc.enter_data dataOperands([[COPYIN]] : memref<10xf32>) 1358// CHECK: [[DEVPTR:%.*]] = acc.getdeviceptr varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> {dataClause = #acc<data_clause acc_copyout>} 1359// CHECK-NEXT: acc.exit_data dataOperands([[DEVPTR]] : memref<10xf32>) 1360// CHECK-NEXT: acc.copyout accPtr([[DEVPTR]] : memref<10xf32>) to varPtr([[ARGA]] : memref<10xf32>) varType(tensor<10xf32>) {structured = false} 1361 1362// ----- 1363 1364func.func @host_device_ops(%a: memref<f32>) -> () { 1365 %devptr = acc.getdeviceptr varPtr(%a : memref<f32>) -> memref<f32> 1366 acc.update_host accPtr(%devptr : memref<f32>) to varPtr(%a : memref<f32>) {structured = false} 1367 acc.update dataOperands(%devptr : memref<f32>) 1368 1369 %accPtr = acc.update_device varPtr(%a : memref<f32>) -> memref<f32> 1370 acc.update dataOperands(%accPtr : memref<f32>) 1371 return 1372} 1373 1374// CHECK-LABEL: func.func @host_device_ops( 1375// CHECK-SAME: %[[A:.*]]: memref<f32>) 1376// CHECK: %[[DEVPTR_A:.*]] = acc.getdeviceptr varPtr(%[[A]] : memref<f32>) -> memref<f32> 1377// CHECK: acc.update_host accPtr(%[[DEVPTR_A]] : memref<f32>) to varPtr(%[[A]] : memref<f32>) {structured = false} 1378// CHECK: acc.update dataOperands(%[[DEVPTR_A]] : memref<f32>) 1379// CHECK: %[[DEVPTR_A:.*]] = acc.update_device varPtr(%[[A]] : memref<f32>) -> memref<f32> 1380// CHECK: acc.update dataOperands(%[[DEVPTR_A]] : memref<f32>) 1381 1382// ----- 1383 1384func.func @host_data_ops(%a: !llvm.ptr, %ifCond: i1) -> () { 1385 %0 = acc.use_device varPtr(%a : !llvm.ptr) varType(f64) -> !llvm.ptr 1386 acc.host_data dataOperands(%0: !llvm.ptr) { 1387 } 1388 acc.host_data dataOperands(%0: !llvm.ptr) { 1389 } attributes {if_present} 1390 acc.host_data if(%ifCond) dataOperands(%0: !llvm.ptr) { 1391 } 1392 return 1393} 1394 1395// CHECK-LABEL: func.func @host_data_ops( 1396// CHECK-SAME: %[[A:.*]]: !llvm.ptr, %[[IFCOND:.*]]: i1) 1397// CHECK: %[[PTR:.*]] = acc.use_device varPtr(%[[A]] : !llvm.ptr) varType(f64) -> !llvm.ptr 1398// CHECK: acc.host_data dataOperands(%[[PTR]] : !llvm.ptr) 1399// CHECK: acc.host_data dataOperands(%[[PTR]] : !llvm.ptr) { 1400// CHECK: } attributes {if_present} 1401// CHECK: acc.host_data if(%[[IFCOND]]) dataOperands(%[[PTR]] : !llvm.ptr) 1402 1403// ----- 1404 1405acc.private.recipe @privatization_i32 : !llvm.ptr init { 1406^bb0(%arg0: !llvm.ptr): 1407 %c1 = arith.constant 1 : i32 1408 %c0 = arith.constant 0 : i32 1409 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr 1410 llvm.store %c0, %0 : i32, !llvm.ptr 1411 acc.yield %0 : !llvm.ptr 1412} 1413 1414// CHECK: acc.private.recipe @privatization_i32 : !llvm.ptr init { 1415// CHECK: %[[C1:.*]] = arith.constant 1 : i32 1416// CHECK: %[[C0:.*]] = arith.constant 0 : i32 1417// CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[C1]] x i32 : (i32) -> !llvm.ptr 1418// CHECK: llvm.store %[[C0]], %[[ALLOCA]] : i32, !llvm.ptr 1419// CHECK: acc.yield %[[ALLOCA]] : !llvm.ptr 1420 1421// ----- 1422 1423func.func private @destroy_struct(!llvm.struct<(i32, i32)>) -> () 1424 1425acc.private.recipe @privatization_struct_i32_i64 : !llvm.struct<(i32, i32)> init { 1426^bb0(%arg0 : !llvm.struct<(i32, i32)>): 1427 %c1 = arith.constant 1 : i32 1428 %0 = llvm.mlir.undef : !llvm.struct<(i32, i32)> 1429 %1 = llvm.insertvalue %c1, %0[0] : !llvm.struct<(i32, i32)> 1430 %2 = llvm.insertvalue %c1, %1[1] : !llvm.struct<(i32, i32)> 1431 acc.yield %2 : !llvm.struct<(i32, i32)> 1432} destroy { 1433^bb0(%arg0: !llvm.struct<(i32, i32)>): 1434 func.call @destroy_struct(%arg0) : (!llvm.struct<(i32, i32)>) -> () 1435 acc.terminator 1436} 1437 1438// CHECK: func.func private @destroy_struct(!llvm.struct<(i32, i32)>) 1439 1440// CHECK: acc.private.recipe @privatization_struct_i32_i64 : !llvm.struct<(i32, i32)> init { 1441// CHECK: %[[C1:.*]] = arith.constant 1 : i32 1442// CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : !llvm.struct<(i32, i32)> 1443// CHECK: %[[UNDEF1:.*]] = llvm.insertvalue %[[C1]], %[[UNDEF]][0] : !llvm.struct<(i32, i32)> 1444// CHECK: %[[UNDEF2:.*]] = llvm.insertvalue %[[C1]], %[[UNDEF1]][1] : !llvm.struct<(i32, i32)> 1445// CHECK: acc.yield %[[UNDEF2]] : !llvm.struct<(i32, i32)> 1446// CHECK: } destroy { 1447// CHECK: ^bb0(%[[ARG0:.*]]: !llvm.struct<(i32, i32)>): 1448// CHECK: func.call @destroy_struct(%[[ARG0]]) : (!llvm.struct<(i32, i32)>) -> () 1449// CHECK: acc.terminator 1450 1451// ----- 1452 1453acc.reduction.recipe @reduction_add_i64 : i64 reduction_operator<add> init { 1454^bb0(%arg0: i64): 1455 %0 = arith.constant 0 : i64 1456 acc.yield %0 : i64 1457} combiner { 1458^bb0(%arg0: i64, %arg1: i64): 1459 %0 = arith.addi %arg0, %arg1 : i64 1460 acc.yield %0 : i64 1461} 1462 1463// CHECK-LABEL: acc.reduction.recipe @reduction_add_i64 : i64 reduction_operator <add> init { 1464// CHECK: ^bb0(%{{.*}}: i64): 1465// CHECK: %[[C0:.*]] = arith.constant 0 : i64 1466// CHECK: acc.yield %[[C0]] : i64 1467// CHECK: } combiner { 1468// CHECK: ^bb0(%[[ARG0:.*]]: i64, %[[ARG1:.*]]: i64): 1469// CHECK: %[[RES:.*]] = arith.addi %[[ARG0]], %[[ARG1]] : i64 1470// CHECK: acc.yield %[[RES]] : i64 1471// CHECK: } 1472 1473func.func @acc_reduc_test(%a : i64) -> () { 1474 %c0 = arith.constant 0 : index 1475 %c10 = arith.constant 10 : index 1476 %c1 = arith.constant 1 : index 1477 acc.parallel reduction(@reduction_add_i64 -> %a : i64) { 1478 acc.loop reduction(@reduction_add_i64 -> %a : i64) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) { 1479 acc.yield 1480 } attributes { inclusiveUpperbound = array<i1: true> } 1481 acc.yield 1482 } 1483 return 1484} 1485 1486// CHECK-LABEL: func.func @acc_reduc_test( 1487// CHECK-SAME: %[[ARG0:.*]]: i64) 1488// CHECK: acc.parallel reduction(@reduction_add_i64 -> %[[ARG0]] : i64) 1489// CHECK: acc.loop reduction(@reduction_add_i64 -> %[[ARG0]] : i64) 1490 1491// ----- 1492 1493acc.reduction.recipe @reduction_add_i64 : i64 reduction_operator<add> init { 1494^bb0(%0: i64): 1495 %1 = arith.constant 0 : i64 1496 acc.yield %1 : i64 1497} combiner { 1498^bb0(%0: i64, %1: i64): 1499 %2 = arith.addi %0, %1 : i64 1500 acc.yield %2 : i64 1501} 1502 1503func.func @acc_reduc_test(%a : i64) -> () { 1504 acc.serial reduction(@reduction_add_i64 -> %a : i64) { 1505 } 1506 return 1507} 1508 1509// CHECK-LABEL: func.func @acc_reduc_test( 1510// CHECK-SAME: %[[ARG0:.*]]: i64) 1511// CHECK: acc.serial reduction(@reduction_add_i64 -> %[[ARG0]] : i64) 1512 1513// ----- 1514 1515func.func @testdeclareop(%a: memref<f32>, %b: memref<f32>, %c: memref<f32>) -> () { 1516 %0 = acc.copyin varPtr(%a : memref<f32>) -> memref<f32> 1517 // copyin(zero) 1518 %1 = acc.copyin varPtr(%b : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyin_readonly>} 1519 // copy 1520 %2 = acc.copyin varPtr(%c : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copy>} 1521 acc.declare_enter dataOperands(%0, %1, %2 : memref<f32>, memref<f32>, memref<f32>) 1522 1523 %3 = acc.create varPtr(%a : memref<f32>) -> memref<f32> 1524 // copyout 1525 %4 = acc.create varPtr(%b : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 1526 %5 = acc.present varPtr(%c : memref<f32>) -> memref<f32> 1527 acc.declare_enter dataOperands(%3, %4, %5 : memref<f32>, memref<f32>, memref<f32>) 1528 1529 %6 = acc.deviceptr varPtr(%a : memref<f32>) -> memref<f32> 1530 %7 = acc.declare_device_resident varPtr(%b : memref<f32>) -> memref<f32> 1531 %8 = acc.declare_link varPtr(%c : memref<f32>) -> memref<f32> 1532 acc.declare_enter dataOperands(%6, %7, %8 : memref<f32>, memref<f32>, memref<f32>) 1533 1534 acc.declare_exit dataOperands(%7, %8 : memref<f32>, memref<f32>) 1535 acc.delete accPtr(%7 : memref<f32>) {dataClause = #acc<data_clause acc_declare_device_resident> } 1536 acc.delete accPtr(%8 : memref<f32>) {dataClause = #acc<data_clause acc_declare_link> } 1537 1538 acc.declare_exit dataOperands(%3, %4, %5 : memref<f32>, memref<f32>, memref<f32>) 1539 acc.delete accPtr(%3 : memref<f32>) {dataClause = #acc<data_clause acc_create> } 1540 acc.copyout accPtr(%4 : memref<f32>) to varPtr(%b : memref<f32>) 1541 acc.delete accPtr(%5 : memref<f32>) {dataClause = #acc<data_clause acc_present> } 1542 1543 acc.declare_exit dataOperands(%0, %1, %2 : memref<f32>, memref<f32>, memref<f32>) 1544 acc.delete accPtr(%0 : memref<f32>) {dataClause = #acc<data_clause acc_copyin> } 1545 acc.delete accPtr(%1 : memref<f32>) {dataClause = #acc<data_clause acc_copyin_readonly> } 1546 acc.copyout accPtr(%2 : memref<f32>) to varPtr(%c : memref<f32>) { dataClause = #acc<data_clause acc_copy> } 1547 1548 return 1549} 1550 1551// CHECK-LABEL: func.func @testdeclareop( 1552// CHECK-SAME: %[[ARGA:.*]]: memref<f32>, %[[ARGB:.*]]: memref<f32>, %[[ARGC:.*]]: memref<f32>) 1553// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 1554// CHECK-NEXT: %[[COPYINRO:.*]] = acc.copyin varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyin_readonly>} 1555// CHECK-NEXT: %[[COPY:.*]] = acc.copyin varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copy>} 1556// CHECK-NEXT: acc.declare_enter dataOperands(%[[COPYIN]], %[[COPYINRO]], %[[COPY]] : memref<f32>, memref<f32>, memref<f32>) 1557// CHECK: %[[CREATE:.*]] = acc.create varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 1558// CHECK-NEXT: %[[COPYOUT:.*]] = acc.create varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> {dataClause = #acc<data_clause acc_copyout>} 1559// CHECK-NEXT: %[[PRESENT:.*]] = acc.present varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> 1560// CHECK-NEXT: acc.declare_enter dataOperands(%[[CREATE]], %[[COPYOUT]], %[[PRESENT]] : memref<f32>, memref<f32>, memref<f32>) 1561// CHECK: %[[DEVICEPTR:.*]] = acc.deviceptr varPtr(%[[ARGA]] : memref<f32>) -> memref<f32> 1562// CHECK-NEXT: %[[DEVICERES:.*]] = acc.declare_device_resident varPtr(%[[ARGB]] : memref<f32>) -> memref<f32> 1563// CHECK-NEXT: %[[LINK:.*]] = acc.declare_link varPtr(%[[ARGC]] : memref<f32>) -> memref<f32> 1564// CHECK-NEXT: acc.declare_enter dataOperands(%[[DEVICEPTR]], %[[DEVICERES]], %[[LINK]] : memref<f32>, memref<f32>, memref<f32>) 1565// CHECK: acc.declare_exit dataOperands(%[[DEVICERES]], %[[LINK]] : memref<f32>, memref<f32>) 1566// CHECK-NEXT: acc.delete accPtr(%[[DEVICERES]] : memref<f32>) {dataClause = #acc<data_clause acc_declare_device_resident>} 1567// CHECK-NEXT: acc.delete accPtr(%[[LINK]] : memref<f32>) {dataClause = #acc<data_clause acc_declare_link>} 1568// CHECK: acc.declare_exit dataOperands(%[[CREATE]], %[[COPYOUT]], %[[PRESENT]] : memref<f32>, memref<f32>, memref<f32>) 1569// CHECK-NEXT: acc.delete accPtr(%[[CREATE]] : memref<f32>) {dataClause = #acc<data_clause acc_create>} 1570// CHECK-NEXT: acc.copyout accPtr(%[[COPYOUT]] : memref<f32>) to varPtr(%[[ARGB]] : memref<f32>) 1571// CHECK-NEXT: acc.delete accPtr(%[[PRESENT]] : memref<f32>) {dataClause = #acc<data_clause acc_present>} 1572// CHECK: acc.declare_exit dataOperands(%[[COPYIN]], %[[COPYINRO]], %[[COPY]] : memref<f32>, memref<f32>, memref<f32>) 1573// CHECK-NEXT: acc.delete accPtr(%[[COPYIN]] : memref<f32>) {dataClause = #acc<data_clause acc_copyin>} 1574// CHECK-NEXT: acc.delete accPtr(%[[COPYINRO]] : memref<f32>) {dataClause = #acc<data_clause acc_copyin_readonly>} 1575// CHECK-NEXT: acc.copyout accPtr(%[[COPY]] : memref<f32>) to varPtr(%[[ARGC]] : memref<f32>) {dataClause = #acc<data_clause acc_copy>} 1576 1577// ----- 1578 1579llvm.mlir.global external @globalvar() { acc.declare = #acc.declare<dataClause = acc_create> } : i32 { 1580 %0 = llvm.mlir.constant(0 : i32) : i32 1581 llvm.return %0 : i32 1582} 1583 1584acc.global_ctor @acc_constructor { 1585 %0 = llvm.mlir.addressof @globalvar { acc.declare = #acc.declare<dataClause = acc_create> } : !llvm.ptr 1586 %1 = acc.create varPtr(%0 : !llvm.ptr) varType(i32) -> !llvm.ptr 1587 acc.declare_enter dataOperands(%1 : !llvm.ptr) 1588 acc.terminator 1589} 1590 1591acc.global_dtor @acc_destructor { 1592 %0 = llvm.mlir.addressof @globalvar { acc.declare = #acc.declare<dataClause = acc_create> } : !llvm.ptr 1593 %1 = acc.getdeviceptr varPtr(%0 : !llvm.ptr) varType(i32) -> !llvm.ptr {dataClause = #acc<data_clause acc_create>} 1594 acc.declare_exit dataOperands(%1 : !llvm.ptr) 1595 acc.delete accPtr(%1 : !llvm.ptr) 1596 acc.terminator 1597} 1598 1599// CHECK-LABEL: acc.global_ctor @acc_constructor 1600// CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @globalvar {acc.declare = #acc.declare<dataClause = acc_create>} : !llvm.ptr 1601// CHECK-NEXT: %[[CREATE:.*]] = acc.create varPtr(%[[ADDR]] : !llvm.ptr) varType(i32) -> !llvm.ptr 1602// CHECK-NEXT: acc.declare_enter dataOperands(%[[CREATE]] : !llvm.ptr) 1603// CHECK: acc.global_dtor @acc_destructor 1604// CHECK: %[[ADDR:.*]] = llvm.mlir.addressof @globalvar {acc.declare = #acc.declare<dataClause = acc_create>} : !llvm.ptr 1605// CHECK-NEXT: %[[DELETE:.*]] = acc.getdeviceptr varPtr(%[[ADDR]] : !llvm.ptr) varType(i32) -> !llvm.ptr {dataClause = #acc<data_clause acc_create>} 1606// CHECK-NEXT: acc.declare_exit dataOperands(%[[DELETE]] : !llvm.ptr) 1607// CHECK-NEXT: acc.delete accPtr(%[[DELETE]] : !llvm.ptr) 1608 1609// ----- 1610 1611func.func @acc_func(%a : i64) -> () attributes {acc.routine_info = #acc.routine_info<[@acc_func_rout1,@acc_func_rout2,@acc_func_rout3, 1612 @acc_func_rout4,@acc_func_rout5,@acc_func_rout6,@acc_func_rout7,@acc_func_rout8,@acc_func_rout9]>} { 1613 return 1614} 1615 1616acc.routine @acc_func_rout1 func(@acc_func) 1617acc.routine @acc_func_rout2 func(@acc_func) bind("acc_func_gpu") 1618acc.routine @acc_func_rout3 func(@acc_func) bind("acc_func_gpu_gang") gang 1619acc.routine @acc_func_rout4 func(@acc_func) bind("acc_func_gpu_vector") vector 1620acc.routine @acc_func_rout5 func(@acc_func) bind("acc_func_gpu_worker") worker 1621acc.routine @acc_func_rout6 func(@acc_func) bind("acc_func_gpu_seq") seq 1622acc.routine @acc_func_rout7 func(@acc_func) bind("acc_func_gpu_imp_gang") implicit gang 1623acc.routine @acc_func_rout8 func(@acc_func) bind("acc_func_gpu_vector_nohost") vector nohost 1624acc.routine @acc_func_rout9 func(@acc_func) bind("acc_func_gpu_gang_dim1") gang(dim: 1 : i64) 1625 1626// CHECK-LABEL: func.func @acc_func( 1627// CHECK: attributes {acc.routine_info = #acc.routine_info<[@acc_func_rout1, @acc_func_rout2, @acc_func_rout3, 1628// CHECK: @acc_func_rout4, @acc_func_rout5, @acc_func_rout6, @acc_func_rout7, @acc_func_rout8, @acc_func_rout9]>} 1629// CHECK: acc.routine @acc_func_rout1 func(@acc_func) 1630// CHECK: acc.routine @acc_func_rout2 func(@acc_func) bind("acc_func_gpu") 1631// CHECK: acc.routine @acc_func_rout3 func(@acc_func) bind("acc_func_gpu_gang") gang 1632// CHECK: acc.routine @acc_func_rout4 func(@acc_func) bind("acc_func_gpu_vector") vector 1633// CHECK: acc.routine @acc_func_rout5 func(@acc_func) bind("acc_func_gpu_worker") worker 1634// CHECK: acc.routine @acc_func_rout6 func(@acc_func) bind("acc_func_gpu_seq") seq 1635// CHECK: acc.routine @acc_func_rout7 func(@acc_func) bind("acc_func_gpu_imp_gang") gang implicit 1636// CHECK: acc.routine @acc_func_rout8 func(@acc_func) bind("acc_func_gpu_vector_nohost") vector nohost 1637// CHECK: acc.routine @acc_func_rout9 func(@acc_func) bind("acc_func_gpu_gang_dim1") gang(dim: 1 : i64) 1638 1639// ----- 1640 1641func.func @acc_func() -> () { 1642 "test.openacc_dummy_op"() {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc>} : () -> () 1643 return 1644} 1645 1646// CHECK-LABEL: func.func @acc_func 1647// CHECK: "test.openacc_dummy_op"() {acc.declare_action = #acc.declare_action<postAlloc = @_QMacc_declareFacc_declare_allocateEa_acc_declare_update_desc_post_alloc>} 1648 1649// ----- 1650 1651func.func @compute3(%a: memref<10x10xf32>, %b: memref<10x10xf32>, %c: memref<10xf32>, %d: memref<10xf32>) { 1652 %lb = arith.constant 0 : index 1653 %st = arith.constant 1 : index 1654 %c10 = arith.constant 10 : index 1655 %numGangs = arith.constant 10 : i64 1656 %numWorkers = arith.constant 10 : i64 1657 1658 %c20 = arith.constant 20 : i32 1659 %alloc = llvm.alloca %c20 x i32 { acc.declare = #acc.declare<dataClause = acc_create, implicit = true> } : (i32) -> !llvm.ptr 1660 %createlocal = acc.create varPtr(%alloc : !llvm.ptr) varType(!llvm.array<20 x i32>) -> !llvm.ptr {implicit = true} 1661 1662 %pa = acc.present varPtr(%a : memref<10x10xf32>) varType(tensor<10x10xf32>) -> memref<10x10xf32> 1663 %pb = acc.present varPtr(%b : memref<10x10xf32>) varType(tensor<10x10xf32>) -> memref<10x10xf32> 1664 %pc = acc.present varPtr(%c : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 1665 %pd = acc.present varPtr(%d : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32> 1666 acc.declare dataOperands(%pa, %pb, %pc, %pd, %createlocal: memref<10x10xf32>, memref<10x10xf32>, memref<10xf32>, memref<10xf32>, !llvm.ptr) { 1667 } 1668 1669 return 1670} 1671 1672// CHECK-LABEL: func.func @compute3 1673// CHECK: acc.declare dataOperands( 1674 1675// ----- 1676 1677%i64Value = arith.constant 1 : i64 1678%i32Value = arith.constant 1 : i32 1679%i32Value2 = arith.constant 2 : i32 1680%idxValue = arith.constant 1 : index 1681%ifCond = arith.constant true 1682acc.set attributes {device_type = #acc.device_type<nvidia>} 1683acc.set device_num(%i64Value : i64) 1684acc.set device_num(%i32Value : i32) 1685acc.set device_num(%idxValue : index) 1686acc.set device_num(%idxValue : index) if(%ifCond) 1687acc.set default_async(%i32Value : i32) 1688 1689// CHECK: [[I64VALUE:%.*]] = arith.constant 1 : i64 1690// CHECK: [[I32VALUE:%.*]] = arith.constant 1 : i32 1691// CHECK: [[I32VALUE2:%.*]] = arith.constant 2 : i32 1692// CHECK: [[IDXVALUE:%.*]] = arith.constant 1 : index 1693// CHECK: [[IFCOND:%.*]] = arith.constant true 1694// CHECK: acc.set attributes {device_type = #acc.device_type<nvidia>} 1695// CHECK: acc.set device_num([[I64VALUE]] : i64) 1696// CHECK: acc.set device_num([[I32VALUE]] : i32) 1697// CHECK: acc.set device_num([[IDXVALUE]] : index) 1698// CHECK: acc.set device_num([[IDXVALUE]] : index) if([[IFCOND]]) 1699// CHECK: acc.set default_async([[I32VALUE]] : i32) 1700 1701// ----- 1702 1703// CHECK-LABEL: func.func @acc_atomic_read 1704// CHECK-SAME: (%[[v:.*]]: memref<i32>, %[[x:.*]]: memref<i32>) 1705func.func @acc_atomic_read(%v: memref<i32>, %x: memref<i32>) { 1706 // CHECK: acc.atomic.read %[[v]] = %[[x]] : memref<i32>, memref<i32>, i32 1707 acc.atomic.read %v = %x : memref<i32>, memref<i32>, i32 1708 return 1709} 1710 1711// ----- 1712 1713// CHECK-LABEL: func.func @acc_atomic_write 1714// CHECK-SAME: (%[[ADDR:.*]]: memref<i32>, %[[VAL:.*]]: i32) 1715func.func @acc_atomic_write(%addr : memref<i32>, %val : i32) { 1716 // CHECK: acc.atomic.write %[[ADDR]] = %[[VAL]] : memref<i32>, i32 1717 acc.atomic.write %addr = %val : memref<i32>, i32 1718 return 1719} 1720 1721// ----- 1722 1723// CHECK-LABEL: func.func @acc_atomic_update 1724// CHECK-SAME: (%[[X:.*]]: memref<i32>, %[[EXPR:.*]]: i32, %[[XBOOL:.*]]: memref<i1>, %[[EXPRBOOL:.*]]: i1) 1725func.func @acc_atomic_update(%x : memref<i32>, %expr : i32, %xBool : memref<i1>, %exprBool : i1) { 1726 // CHECK: acc.atomic.update %[[X]] : memref<i32> 1727 // CHECK-NEXT: (%[[XVAL:.*]]: i32): 1728 // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.add %[[XVAL]], %[[EXPR]] : i32 1729 // CHECK-NEXT: acc.yield %[[NEWVAL]] : i32 1730 acc.atomic.update %x : memref<i32> { 1731 ^bb0(%xval: i32): 1732 %newval = llvm.add %xval, %expr : i32 1733 acc.yield %newval : i32 1734 } 1735 // CHECK: acc.atomic.update %[[XBOOL]] : memref<i1> 1736 // CHECK-NEXT: (%[[XVAL:.*]]: i1): 1737 // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.and %[[XVAL]], %[[EXPRBOOL]] : i1 1738 // CHECK-NEXT: acc.yield %[[NEWVAL]] : i1 1739 acc.atomic.update %xBool : memref<i1> { 1740 ^bb0(%xval: i1): 1741 %newval = llvm.and %xval, %exprBool : i1 1742 acc.yield %newval : i1 1743 } 1744 // CHECK: acc.atomic.update %[[X]] : memref<i32> 1745 // CHECK-NEXT: (%[[XVAL:.*]]: i32): 1746 // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.shl %[[XVAL]], %[[EXPR]] : i32 1747 // CHECK-NEXT: acc.yield %[[NEWVAL]] : i32 1748 // CHECK-NEXT: } 1749 acc.atomic.update %x : memref<i32> { 1750 ^bb0(%xval: i32): 1751 %newval = llvm.shl %xval, %expr : i32 1752 acc.yield %newval : i32 1753 } 1754 // CHECK: acc.atomic.update %[[X]] : memref<i32> 1755 // CHECK-NEXT: (%[[XVAL:.*]]: i32): 1756 // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.intr.smax(%[[XVAL]], %[[EXPR]]) : (i32, i32) -> i32 1757 // CHECK-NEXT: acc.yield %[[NEWVAL]] : i32 1758 // CHECK-NEXT: } 1759 acc.atomic.update %x : memref<i32> { 1760 ^bb0(%xval: i32): 1761 %newval = llvm.intr.smax(%xval, %expr) : (i32, i32) -> i32 1762 acc.yield %newval : i32 1763 } 1764 1765 // CHECK: acc.atomic.update %[[XBOOL]] : memref<i1> 1766 // CHECK-NEXT: (%[[XVAL:.*]]: i1): 1767 // CHECK-NEXT: %[[NEWVAL:.*]] = llvm.icmp "eq" %[[XVAL]], %[[EXPRBOOL]] : i1 1768 // CHECK-NEXT: acc.yield %[[NEWVAL]] : i1 1769 // } 1770 acc.atomic.update %xBool : memref<i1> { 1771 ^bb0(%xval: i1): 1772 %newval = llvm.icmp "eq" %xval, %exprBool : i1 1773 acc.yield %newval : i1 1774 } 1775 1776 // CHECK: acc.atomic.update %[[X]] : memref<i32> { 1777 // CHECK-NEXT: (%[[XVAL:.*]]: i32): 1778 // CHECK-NEXT: acc.yield %[[XVAL]] : i32 1779 // CHECK-NEXT: } 1780 acc.atomic.update %x : memref<i32> { 1781 ^bb0(%xval:i32): 1782 acc.yield %xval : i32 1783 } 1784 1785 // CHECK: acc.atomic.update %[[X]] : memref<i32> { 1786 // CHECK-NEXT: (%[[XVAL:.*]]: i32): 1787 // CHECK-NEXT: acc.yield %{{.+}} : i32 1788 // CHECK-NEXT: } 1789 %const = arith.constant 42 : i32 1790 acc.atomic.update %x : memref<i32> { 1791 ^bb0(%xval:i32): 1792 acc.yield %const : i32 1793 } 1794 1795 return 1796} 1797 1798// ----- 1799 1800// CHECK-LABEL: func.func @acc_atomic_capture 1801// CHECK-SAME: (%[[v:.*]]: memref<i32>, %[[x:.*]]: memref<i32>, %[[expr:.*]]: i32) 1802func.func @acc_atomic_capture(%v: memref<i32>, %x: memref<i32>, %expr: i32) { 1803 // CHECK: acc.atomic.capture { 1804 // CHECK-NEXT: acc.atomic.update %[[x]] : memref<i32> 1805 // CHECK-NEXT: (%[[xval:.*]]: i32): 1806 // CHECK-NEXT: %[[newval:.*]] = llvm.add %[[xval]], %[[expr]] : i32 1807 // CHECK-NEXT: acc.yield %[[newval]] : i32 1808 // CHECK-NEXT: } 1809 // CHECK-NEXT: acc.atomic.read %[[v]] = %[[x]] : memref<i32>, memref<i32>, i32 1810 // CHECK-NEXT: } 1811 acc.atomic.capture { 1812 acc.atomic.update %x : memref<i32> { 1813 ^bb0(%xval: i32): 1814 %newval = llvm.add %xval, %expr : i32 1815 acc.yield %newval : i32 1816 } 1817 acc.atomic.read %v = %x : memref<i32>, memref<i32>, i32 1818 } 1819 // CHECK: acc.atomic.capture { 1820 // CHECK-NEXT: acc.atomic.read %[[v]] = %[[x]] : memref<i32>, memref<i32>, i32 1821 // CHECK-NEXT: acc.atomic.update %[[x]] : memref<i32> 1822 // CHECK-NEXT: (%[[xval:.*]]: i32): 1823 // CHECK-NEXT: %[[newval:.*]] = llvm.add %[[xval]], %[[expr]] : i32 1824 // CHECK-NEXT: acc.yield %[[newval]] : i32 1825 // CHECK-NEXT: } 1826 // CHECK-NEXT: } 1827 acc.atomic.capture { 1828 acc.atomic.read %v = %x : memref<i32>, memref<i32>, i32 1829 acc.atomic.update %x : memref<i32> { 1830 ^bb0(%xval: i32): 1831 %newval = llvm.add %xval, %expr : i32 1832 acc.yield %newval : i32 1833 } 1834 } 1835 // CHECK: acc.atomic.capture { 1836 // CHECK-NEXT: acc.atomic.read %[[v]] = %[[x]] : memref<i32>, memref<i32>, i32 1837 // CHECK-NEXT: acc.atomic.write %[[x]] = %[[expr]] : memref<i32>, i32 1838 // CHECK-NEXT: } 1839 acc.atomic.capture { 1840 acc.atomic.read %v = %x : memref<i32>, memref<i32>, i32 1841 acc.atomic.write %x = %expr : memref<i32>, i32 1842 } 1843 1844 return 1845} 1846 1847// ----- 1848 1849// CHECK-LABEL: func.func @acc_num_gangs 1850func.func @acc_num_gangs() { 1851 %c2 = arith.constant 2 : i32 1852 %c1 = arith.constant 1 : i32 1853 acc.parallel num_gangs({%c2 : i32} [#acc.device_type<default>], {%c1 : i32, %c1 : i32, %c1 : i32} [#acc.device_type<nvidia>]) { 1854 } 1855 1856 return 1857} 1858 1859// CHECK: acc.parallel num_gangs({%c2{{.*}} : i32} [#acc.device_type<default>], {%c1{{.*}} : i32, %c1{{.*}} : i32, %c1{{.*}} : i32} [#acc.device_type<nvidia>]) 1860 1861// ----- 1862 1863// CHECK-LABEL: func.func @acc_combined 1864func.func @acc_combined() { 1865 acc.parallel combined(loop) { 1866 acc.loop combined(parallel) { 1867 acc.yield 1868 } 1869 acc.terminator 1870 } 1871 1872 acc.kernels combined(loop) { 1873 acc.loop combined(kernels) { 1874 acc.yield 1875 } 1876 acc.terminator 1877 } 1878 1879 acc.serial combined(loop) { 1880 acc.loop combined(serial) { 1881 acc.yield 1882 } 1883 acc.terminator 1884 } 1885 1886 return 1887} 1888 1889// CHECK: acc.parallel combined(loop) 1890// CHECK: acc.loop combined(parallel) 1891// CHECK: acc.kernels combined(loop) 1892// CHECK: acc.loop combined(kernels) 1893// CHECK: acc.serial combined(loop) 1894// CHECK: acc.loop combined(serial) 1895 1896acc.firstprivate.recipe @firstprivatization_memref_i32 : memref<i32> init { 1897^bb0(%arg0: memref<i32>): 1898 %alloca = memref.alloca() : memref<i32> 1899 acc.yield %alloca : memref<i32> 1900} copy { 1901^bb0(%arg0: memref<i32>, %arg1: memref<i32>): 1902 %0 = memref.load %arg1[] : memref<i32> 1903 memref.store %0, %arg0[] : memref<i32> 1904 acc.terminator 1905} 1906 1907// CHECK-LABEL: acc.firstprivate.recipe @firstprivatization_memref_i32 1908// CHECK: memref.alloca 1909 1910acc.reduction.recipe @reduction_add_memref_i32 : memref<i32> reduction_operator <add> init { 1911^bb0(%arg0: memref<i32>): 1912 %c0_i32 = arith.constant 0 : i32 1913 %alloca = memref.alloca() : memref<i32> 1914 memref.store %c0_i32, %alloca[] : memref<i32> 1915 acc.yield %alloca : memref<i32> 1916} combiner { 1917^bb0(%arg0: memref<i32>, %arg1: memref<i32>): 1918 %0 = memref.load %arg0[] : memref<i32> 1919 %1 = memref.load %arg1[] : memref<i32> 1920 %2 = arith.addi %0, %1 : i32 1921 memref.store %2, %arg0[] : memref<i32> 1922 acc.yield %arg0 : memref<i32> 1923} 1924 1925// CHECK-LABEL: acc.reduction.recipe @reduction_add_memref_i32 1926// CHECK: memref.alloca 1927 1928acc.private.recipe @privatization_memref_i32 : memref<i32> init { 1929^bb0(%arg0: memref<i32>): 1930 %alloca = memref.alloca() : memref<i32> 1931 acc.yield %alloca : memref<i32> 1932} 1933 1934// CHECK-LABEL: acc.private.recipe @privatization_memref_i32 1935// CHECK: memref.alloca 1936