1! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s 2 3! Test CUDA Fortran procedures available in cudadevice module 4 5attributes(global) subroutine devsub() 6 implicit none 7 integer :: ret 8 real(4) :: af 9 real(8) :: ad 10 integer(4) :: ai 11 integer(8) :: al 12 13 call syncthreads() 14 call syncwarp(1) 15 call threadfence() 16 call threadfence_block() 17 call threadfence_system() 18 ret = syncthreads_and(1) 19 ret = syncthreads_count(1) 20 ret = syncthreads_or(1) 21 22 ai = atomicadd(ai, 1_4) 23 al = atomicadd(al, 1_8) 24 af = atomicadd(af, 1.0_4) 25 ad = atomicadd(ad, 1.0_8) 26 27 ai = atomicsub(ai, 1_4) 28 al = atomicsub(al, 1_8) 29 af = atomicsub(af, 1.0_4) 30 ad = atomicsub(ad, 1.0_8) 31 32 ai = atomicmax(ai, 1_4) 33 al = atomicmax(al, 1_8) 34 af = atomicmax(af, 1.0_4) 35 ad = atomicmax(ad, 1.0_8) 36 37 ai = atomicmin(ai, 1_4) 38 al = atomicmin(al, 1_8) 39 af = atomicmin(af, 1.0_4) 40 ad = atomicmin(ad, 1.0_8) 41 42 ai = atomicand(ai, 1_4) 43 ai = atomicor(ai, 1_4) 44 ai = atomicinc(ai, 1_4) 45 ai = atomicdec(ai, 1_4) 46end 47 48! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} 49! CHECK: fir.call @llvm.nvvm.barrier0() fastmath<contract> : () -> () 50! CHECK: fir.call @__syncwarp(%{{.*}}) proc_attrs<bind_c> fastmath<contract> : (!fir.ref<i32>) -> () 51! CHECK: fir.call @llvm.nvvm.membar.gl() fastmath<contract> : () -> () 52! CHECK: fir.call @llvm.nvvm.membar.cta() fastmath<contract> : () -> () 53! CHECK: fir.call @llvm.nvvm.membar.sys() fastmath<contract> : () -> () 54! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.and(%c1_i32_0) fastmath<contract> : (i32) -> i32 55! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.popc(%c1_i32_1) fastmath<contract> : (i32) -> i32 56! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.or(%c1_i32_2) fastmath<contract> : (i32) -> i32 57! CHECK: %{{.*}} = llvm.atomicrmw add %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 58! CHECK: %{{.*}} = llvm.atomicrmw add %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64 59! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32 60! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64 61 62! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 63! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64 64! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32 65! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64 66 67! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 68! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64 69! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32 70! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64 71 72! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 73! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64 74! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32 75! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64 76 77! CHECK: %{{.*}} = llvm.atomicrmw _and %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 78! CHECK: %{{.*}} = llvm.atomicrmw _or %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 79! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 80! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32 81 82! CHECK: func.func private @llvm.nvvm.barrier0() 83! CHECK: func.func private @__syncwarp(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncwarp", fir.proc_attrs = #fir.proc_attrs<bind_c>} 84! CHECK: func.func private @llvm.nvvm.membar.gl() 85! CHECK: func.func private @llvm.nvvm.membar.cta() 86! CHECK: func.func private @llvm.nvvm.membar.sys() 87! CHECK: func.func private @llvm.nvvm.barrier0.and(i32) -> i32 88! CHECK: func.func private @llvm.nvvm.barrier0.popc(i32) -> i32 89! CHECK: func.func private @llvm.nvvm.barrier0.or(i32) -> i32 90